acts-project · benjaminhuth · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 15, 2025
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -159,7 +159,7 @@ test_exatrkx_unittests:
     - git checkout $HEAD_SHA
     - source CI/dependencies.sh
     - cd ..
-    - ctest --test-dir build -R ExaTrkX
+    - ctest --test-dir build -R "(ExaTrkX|CudaConnectedComponents)"
 
 test_exatrkx_python:
   stage: test

diff --git a/Examples/Python/src/ExaTrkXTrackFinding.cpp b/Examples/Python/src/ExaTrkXTrackFinding.cpp
@@ -7,6 +7,7 @@
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 #include "Acts/Plugins/ExaTrkX/BoostTrackBuilding.hpp"
+#include "Acts/Plugins/ExaTrkX/CudaTrackBuilding.hpp"
 #include "Acts/Plugins/ExaTrkX/ExaTrkXPipeline.hpp"
 #include "Acts/Plugins/ExaTrkX/OnnxEdgeClassifier.hpp"
 #include "Acts/Plugins/ExaTrkX/OnnxMetricLearning.hpp"
@@ -112,6 +113,25 @@ void addExaTrkXTrackFinding(Context &ctx) {
   }
 #endif
 
+#ifdef ACTS_EXATRKX_WITH_CUDA
+  {
+    using Alg = Acts::CudaTrackBuilding;
+    using Config = Alg::Config;
+
+    auto alg = py::class_<Alg, Acts::TrackBuildingBase, std::shared_ptr<Alg>>(
+                   mex, "CudaTrackBuilding")
+                   .def(py::init([](const Config &c, Logging::Level lvl) {
+                          return std::make_shared<Alg>(
+                              c, getDefaultLogger("TrackBuilding", lvl));
+                        }),
+                        "config"_a, "level"_a);
+
+    auto c = py::class_<Config>(alg, "Config").def(py::init<>());
+    ACTS_PYTHON_STRUCT_BEGIN(c, Config);
+    ACTS_PYTHON_STRUCT_END();
+  }
+#endif
+
 #ifdef ACTS_EXATRKX_ONNX_BACKEND
   {
     using Alg = Acts::OnnxMetricLearning;

diff --git a/Plugins/ExaTrkX/CMakeLists.txt b/Plugins/ExaTrkX/CMakeLists.txt
@@ -2,6 +2,7 @@ add_library(ActsPluginExaTrkX SHARED src/buildEdges.cpp src/ExaTrkXPipeline.cpp)
 
 if(ACTS_EXATRKX_ENABLE_CUDA)
     target_compile_definitions(ActsPluginExaTrkX PUBLIC ACTS_EXATRKX_WITH_CUDA)
+    target_sources(ActsPluginExaTrkX PRIVATE src/CudaTrackBuilding.cu)
 endif()
 
 if(ACTS_EXATRKX_ENABLE_ONNX)

diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/CudaTrackBuilding.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/CudaTrackBuilding.hpp
@@ -0,0 +1,44 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Plugins/ExaTrkX/Stages.hpp"
+#include "Acts/Utilities/Logger.hpp"
+
+#include <memory>
+
+#include <torch/script.h>
+
+namespace Acts {
+
+class CudaTrackBuilding final : public Acts::TrackBuildingBase {
+ public:
+  struct Config {
+    // nothing yet
+  };
+
+  CudaTrackBuilding(const Config &cfg, std::unique_ptr<const Logger> logger)
+      : m_cfg(cfg),
+        m_logger(std::move(logger)),
+        m_device(torch::Device(torch::kCUDA)) {}
+
+  std::vector<std::vector<int>> operator()(
+      std::any nodes, std::any edges, std::any edge_weights,
+      std::vector<int> &spacepointIDs,
+      const ExecutionContext &execContext = {}) override;
+  torch::Device device() const override { return m_device; };
+
+ private:
+  Config m_cfg;
+  std::unique_ptr<const Acts::Logger> m_logger;
+  torch::Device m_device;
+  const auto &logger() const { return *m_logger; }
+};
+
+}  // namespace Acts
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/ConnectedComponents.cuh b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/ConnectedComponents.cuh
@@ -0,0 +1,173 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Plugins/ExaTrkX/detail/CudaUtils.cuh"
+
+#include <thrust/execution_policy.h>
+#include <thrust/scan.h>
+
+namespace Acts::detail {
+
+template <typename T>
+__device__ void swap(T &a, T &b) {
+  T tmp = a;
+  a = b;
+  b = tmp;
+}
+
+/// Implementation of the FastSV algorithm as shown in
+/// https://arxiv.org/abs/1910.05971
+template <typename TEdge, typename TLabel>
+__global__ void labelConnectedComponents(std::size_t numEdges,
+                                         const TEdge *sourceEdges,
+                                         const TEdge *targetEdges,
+                                         std::size_t numNodes, TLabel *labels,
+                                         TLabel *labelsNext) {
+  // Currently this kernel works only with 1 block
+  assert(gridDim.x == 1);
+
+  for (std::size_t i = threadIdx.x; i < numNodes; i += blockDim.x) {
+    labels[i] = i;
+    labelsNext[i] = i;
+  }
+
+  __shared__ bool changed;
+
+  do {
+    changed = false;
+    __syncthreads();
+
+    //printf("Iteration %i\n", n);
+
+    // Tree hooking for each edge;
+    for (std::size_t i = threadIdx.x; i < numEdges; i += blockDim.x) {
+      auto u = sourceEdges[i];
+      auto v = targetEdges[i];
+
+      if (labels[u] == labels[labels[u]] && labels[v] < labels[u]) {
+        labelsNext[labels[u]] = labels[v];
+        changed = true;
+        //printf("Edge (%i,%i): set labelsNext[%i] = labels[%i] = %i\n", u, v, labels[u], v, labels[v]);
+      } else if (labels[v] == labels[labels[v]] && labels[u] < labels[v]) {
+        labelsNext[labels[v]] = labels[u];
+        changed = true;
+        //printf("Edge (%i,%i): set labelsNext[%i] = labels[%i] = %i\n", u, v, labels[v], u, labels[u]);
+      } else {
+        //printf("Edge (%i,%i): no action\n", u, v);
+      }
+    }
+    __syncthreads();
+
+    for (std::size_t i = threadIdx.x; i < numNodes; i += blockDim.x) {
+      labels[i] = labelsNext[i];
+    }
+
+    /*if(threadIdx.x == 0 ) {
+      for(int i=0; i<numNodes; ++i) {
+        printf("Vertex %i - label %i\n", i, labels[i]);
+      }
+    }*/
+
+    // Shortcutting
+    for (std::size_t i = threadIdx.x; i < numNodes; i += blockDim.x) {
+      if (labels[i] != labels[labels[i]]) {
+        labelsNext[i] = labels[labels[i]];
+        //printf("Vertex %i: labelsNext[%i] = labels[%i] = %i\n", i, i, labels[i], labels[labels[i]]);
+        changed = true;
+      }
+    }
+
+    for (std::size_t i = threadIdx.x; i < numNodes; i += blockDim.x) {
+      labels[i] = labelsNext[i];
+    }
+
+    /*if(threadIdx.x == 0 ) {
+      for(int i=0; i<numNodes; ++i) {
+        printf("Vertex after Shortcutting %i - label %i\n", i, labels[i]);
+      }
+    }*/
+
+    __syncthreads();
+  } while (changed == true)
+}
+
+template <typename T>
+__global__ void makeLabelMask(std::size_t nLabels, const T *labels,
+                              T *labelMask) {
+  std::size_t i = threadIdx.x + blockDim.x * blockIdx.x;
+
+  if (i >= nLabels) {
+    return;
+  }
+
+  labelMask[labels[i]] = 1;
+}
+
+template <typename T>
+__global__ void mapEdgeLabels(std::size_t nLabels, T *labels,
+                              const T *mapping) {
+  std::size_t i = threadIdx.x + blockDim.x * blockIdx.x;
+
+  if (i >= nLabels) {
+    return;
+  }
+
+  labels[i] = mapping[labels[i]];
+}
+
+template <typename TEdges, typename TLabel>
+TLabel connectedComponentsCuda(std::size_t nEdges, const TEdges *sourceEdges,
+                               const TEdges *targetEdges, std::size_t nNodes,
+                               TLabel *labels, cudaStream_t stream) {
+  TLabel *tmpMemory;
+  ACTS_CUDA_CHECK(cudaMallocAsync(&tmpMemory, nNodes * sizeof(TLabel), stream));
+
+  // Make synchronization in one block, to avoid that inter-block sync is
+  // necessary
+  dim3 blockDim = 1024;
+  labelConnectedComponents<<<1, blockDim, 1, stream>>>(
+      nEdges, sourceEdges, targetEdges, nNodes, labels, tmpMemory);
+  ACTS_CUDA_CHECK(cudaGetLastError());
+
+  // Assume we have the following components:
+  // 0 3 5 3 0 0
+
+  // Fill a mask which labels survived the connected components algorithm
+  // 0 1 2 3 4 5
+  // 1 0 0 1 0 1
+  ACTS_CUDA_CHECK(
+      cudaMemsetAsync(tmpMemory, 0, nNodes * sizeof(TLabel), stream));
+  dim3 gridDim = (nNodes + blockDim.x - 1) / blockDim.x;
+  makeLabelMask<<<gridDim, blockDim, 0, stream>>>(nNodes, labels, tmpMemory);
+  ACTS_CUDA_CHECK(cudaGetLastError());
+
+  // Exclusive prefix sum on the label mask
+  // 0 1 2 3 4 5
+  // 0 1 1 1 2 2
+  thrust::exclusive_scan(thrust::device.on(stream), tmpMemory,
+                         tmpMemory + nNodes, tmpMemory);
+
+  // Remap edge labels with values in prefix sum
+  // 0 -> 0, 3 -> 1, 5 -> 2
+  mapEdgeLabels<<<gridDim, blockDim, 0, stream>>>(nNodes, labels, tmpMemory);
+  ACTS_CUDA_CHECK(cudaGetLastError());
+
+  TLabel nLabels;
+  ACTS_CUDA_CHECK(cudaMemcpyAsync(&nLabels, &tmpMemory[nNodes - 1],
+                                  sizeof(TLabel), cudaMemcpyDeviceToHost,
+                                  stream));
+
+  ACTS_CUDA_CHECK(cudaFreeAsync(tmpMemory, stream));
+  ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
+
+  return nLabels;
+}
+
+}  // namespace Acts::detail
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/CudaUtils.cuh b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/CudaUtils.cuh
@@ -0,0 +1,32 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include <sstream>
+
+#include <cuda_runtime_api.h>
+
+namespace Acts::detail {
+
+inline void cudaAssert(cudaError_t code, const char *file, int line) {
+  if (code != cudaSuccess) {
+    std::stringstream ss;
+    ss << "CUDA error: " << cudaGetErrorString(code) << ", " << file << ":"
+       << line;
+    throw std::runtime_error(ss.str());
+  }
+  cudaDeviceSynchronize();
+}
+
+}  // namespace Acts::detail
+
+#define ACTS_CUDA_CHECK(ans)                             \
+  do {                                                   \
+    Acts::detail::cudaAssert((ans), __FILE__, __LINE__); \
+  } while (0)
diff --git a/Plugins/ExaTrkX/src/CudaTrackBuilding.cu b/Plugins/ExaTrkX/src/CudaTrackBuilding.cu
@@ -0,0 +1,71 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "Acts/Plugins/ExaTrkX/CudaTrackBuilding.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/ConnectedComponents.cuh"
+#include "Acts/Plugins/ExaTrkX/detail/CudaUtils.cuh"
+#include "Acts/Utilities/Zip.hpp"
+
+#include <c10/cuda/CUDAGuard.h>
+#include <c10/cuda/CUDAStream.h>
+#include <torch/torch.h>
+
+namespace Acts {
+
+std::vector<std::vector<int>> CudaTrackBuilding::operator()(
+    std::any /*nodes*/, std::any edges, std::any weights,
+    std::vector<int>& spacepointIDs, const ExecutionContext& execContext) {
+  ACTS_VERBOSE("Start CUDA track building");
+  c10::cuda::CUDAStreamGuard guard(execContext.stream.value());
+
+  const auto edgeTensor = std::any_cast<torch::Tensor>(edges).to(torch::kCUDA);
+  assert(edgeTensor.size(0) == 2);
+
+  const auto numSpacepoints = spacepointIDs.size();
+  const auto numEdges = static_cast<std::size_t>(edgeTensor.size(1));
+
+  if (numEdges == 0) {
+    ACTS_WARNING("No edges remained after edge classification");
+    return {};
+  }
+
+  auto stream = execContext.stream->stream();
+
+  auto cudaSrcPtr = edgeTensor.data_ptr<std::int64_t>();
+  auto cudaTgtPtr = edgeTensor.data_ptr<std::int64_t>() + numEdges;
+
+  int* cudaLabels;
+  ACTS_CUDA_CHECK(
+      cudaMallocAsync(&cudaLabels, numSpacepoints * sizeof(int), stream));
+
+  std::size_t numberLabels = detail::connectedComponentsCuda(
+      numEdges, cudaSrcPtr, cudaTgtPtr, numSpacepoints, cudaLabels, stream);
+
+  // TODO not sure why there is an issue that is not detected in the unit tests
+  numberLabels += 1;
+
+  std::vector<int> trackLabels(numSpacepoints);
+  ACTS_CUDA_CHECK(cudaMemcpyAsync(trackLabels.data(), cudaLabels,
+                                  numSpacepoints * sizeof(int),
+                                  cudaMemcpyDeviceToHost, stream));
+  ACTS_CUDA_CHECK(cudaFreeAsync(cudaLabels, stream));
+  ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
+  ACTS_CUDA_CHECK(cudaGetLastError());
+
+  ACTS_VERBOSE("Found " << numberLabels << " track candidates");
+
+  std::vector<std::vector<int>> trackCandidates(numberLabels);
+
+  for (const auto [label, id] : Acts::zip(trackLabels, spacepointIDs)) {
+    trackCandidates[label].push_back(id);
+  }
+
+  return trackCandidates;
+}
+
+}  // namespace Acts
diff --git a/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt b/Tests/UnitTests/Plugins/ExaTrkX/CMakeLists.txt
@@ -4,3 +4,4 @@ add_unittest(ExaTrkXTensorConversion ExaTrkXTensorConversionTests.cpp)
 add_unittest(ExaTrkXEdgeBuilding ExaTrkXEdgeBuildingTest.cpp)
 add_unittest(ExaTrkXBoostTrackBuilding ExaTrkXBoostTrackBuildingTests.cpp)
 add_unittest(ExaTrkXMetricHookTests ExaTrkXMetricHookTests.cpp)
+add_unittest(ConnectedComponentsCuda ConnectedComponentCudaTests.cu)