Skip to content

Commit

Permalink
Electrons split
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanGonzalezCaminero committed Sep 24, 2024
1 parent 0170e64 commit fa8ba18
Show file tree
Hide file tree
Showing 8 changed files with 719 additions and 150 deletions.
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ set(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON)
# Options
option(ADEPT_USE_SURF "Enable surface model navigation on GPU" OFF)
option(ADEPT_USE_SURF_SINGLE "Use surface model in single precision" OFF)
option(USE_SPLIT_KERNELS "Run split version of the transport kernels" OFF)
option(DEBUG_SINGLE_THREAD "Run transport kernels in single thread mode" OFF)
option(WITH_FLUCT "Switch on the energy loss fluctuations" OFF)

Expand Down Expand Up @@ -83,6 +84,11 @@ endif()
if(NOT TARGET VecGeom::vgdml)
message(FATAL_ERROR "AdePT requires VecGeom compiled with GDML support")
endif()
# Run split kernels
if (USE_SPLIT_KERNELS)
add_compile_definitions(USE_SPLIT_KERNELS)
message(STATUS "${Green}AdePT will run with split kernels${ColorReset}")
endif()
# Debugging in single-thread mode
if (DEBUG_SINGLE_THREAD)
add_compile_definitions("$<$<CONFIG:Debug>:DEBUG_SINGLE_THREAD>")
Expand Down Expand Up @@ -153,8 +159,6 @@ if(NOT WITH_FLUCT)
add_compile_definitions(NOFLUCTUATION)
endif()

# string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")

#----------------------------------------------------------------------------#
# Build Targets
#----------------------------------------------------------------------------#
Expand Down
2 changes: 0 additions & 2 deletions examples/Example1/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ configure_file("macros/example1_ttbar_noadept.mac.in" "${PROJECT_BINARY_DIR}/exa

# Tests

string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")

add_test(NAME example1
COMMAND $<TARGET_FILE:example1> -m ${PROJECT_BINARY_DIR}/example1_large_stack.mac
)
4 changes: 2 additions & 2 deletions examples/Example1/macros/example1_ttbar_LHCb.mac.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
## Threshold for buffering tracks before sending to GPU
/adept/setTransportBufferThreshold 2000
## Total number of GPU track slots (not per thread)
/adept/setMillionsOfTrackSlots 8
/adept/setMillionsOfTrackSlots 4
/adept/setMillionsOfHitSlots 1
## Device stack limit
/adept/setCUDAStackLimit 4096
# /adept/setCUDAStackLimit 4096

# If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored
/adept/setTrackInAllRegions true
Expand Down
88 changes: 65 additions & 23 deletions include/AdePT/core/AdePTTransport.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#include <AdePT/base/Atomic.h>
#include <AdePT/navigation/AdePTNavigator.h>
#include <AdePT/base/MParray.h>
#include <AdePT/kernels/electrons.cuh>
// #include <AdePT/kernels/gammas.cuh>


#include <AdePT/kernels/gammas_experimental.cuh>

#ifndef USE_SPLIT_KERNELS
#include <AdePT/kernels/electrons.cuh>
#include <AdePT/kernels/gammas.cuh>
#else
#include <AdePT/kernels/electrons_split.cuh>
#include <AdePT/kernels/gammas_split.cuh>
#endif

#include <VecGeom/base/Config.h>
#ifdef VECGEOM_ENABLE_CUDA
Expand All @@ -34,6 +36,8 @@
#include <G4HepEmStateInit.hh>
#include <G4HepEmParameters.hh>
#include <G4HepEmMatCutData.hh>
#include <G4HepEmElectronTrack.hh>
#include <G4HepEmGammaTrack.hh>

#include <iostream>
#include <iomanip>
Expand Down Expand Up @@ -263,6 +267,16 @@ GPUstate *InitializeGPU(adeptint::TrackBuffer &buffer, int capacity, int maxbatc
COPCORE_CUDA_CHECK(cudaStreamCreate(&gpuState.particles[i].stream));
COPCORE_CUDA_CHECK(cudaEventCreate(&gpuState.particles[i].event));
}

#ifdef USE_SPLIT_KERNELS
// Init HepEM tracks
// e+ / e-
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.electronsHepEm, capacity * sizeof(G4HepEmElectronTrack)));
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.positronsHepEm, capacity * sizeof(G4HepEmElectronTrack)));
// Gammas
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.gammasHepEm, capacity * sizeof(G4HepEmGammaTrack)));
#endif

InitLeakedQueues<<<1, 1, 0, gpuState.stream>>>(gpuState.allmgr_d, kQueueSize);
COPCORE_CUDA_CHECK(cudaDeviceSynchronize());

Expand All @@ -289,6 +303,12 @@ void FreeGPU(GPUstate &gpuState, G4HepEmState *g4hepem_state)
COPCORE_CUDA_CHECK(cudaFreeHost(gpuState.stats));
COPCORE_CUDA_CHECK(cudaFree(gpuState.toDevice_dev));

#ifdef USE_SPLIT_KERNELS
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.electronsHepEm));
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.positronsHepEm));
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.gammasHepEm));
#endif

COPCORE_CUDA_CHECK(cudaStreamDestroy(gpuState.stream));

for (int i = 0; i < ParticleType::NumParticleTypes; i++) {
Expand Down Expand Up @@ -381,10 +401,22 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numElectrons + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
#ifndef USE_SPLIT_KERNELS
TransportElectrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, secondaries, electrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);

#else
ElectronHowFar<true><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm, VolAuxArray::GetInstance().fAuxData_dev);
ElectronPropagation<true><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm);
ElectronMSC<true><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm);
ElectronRelocation<true><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(electrons.trackmgr);
ElectronInteractions<true, AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm, secondaries, electrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);
#endif
COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, electrons.event, 0));
}
Expand All @@ -396,10 +428,22 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numPositrons + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
#ifndef USE_SPLIT_KERNELS
TransportPositrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, secondaries, positrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);

#else
ElectronHowFar<false><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm, VolAuxArray::GetInstance().fAuxData_dev);
ElectronPropagation<false><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm);
ElectronMSC<false><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm);
ElectronRelocation<false><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(positrons.trackmgr);
ElectronInteractions<false, AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm, secondaries, positrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);
#endif
COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, positrons.event, 0));
}
Expand All @@ -411,22 +455,20 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numGammas + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
Physics1<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, VolAuxArray::GetInstance().fAuxData_dev
);
Transport1<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, VolAuxArray::GetInstance().fAuxData_dev
);
Relocation<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gammas.leakedTracks, VolAuxArray::GetInstance().fAuxData_dev
);
Physics2<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, secondaries, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev
);

// TransportGammas<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
// gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev);

#ifndef USE_SPLIT_KERNELS
TransportGammas<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev);
#else
GammaHowFar<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, VolAuxArray::GetInstance().fAuxData_dev);
GammaPropagation<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, VolAuxArray::GetInstance().fAuxData_dev);
GammaRelocation<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(gammas.trackmgr, gammas.leakedTracks,
VolAuxArray::GetInstance().fAuxData_dev);
GammaInteractions<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, secondaries, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);
#endif
COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, gammas.event, 0));
}
Expand Down
16 changes: 16 additions & 0 deletions include/AdePT/core/AdePTTransportStruct.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#include <G4HepEmParameters.hh>
#include <G4HepEmRandomEngine.hh>

#ifdef USE_SPLIT_KERNELS
#include <G4HepEmElectronTrack.hh>
#include <G4HepEmGammaTrack.hh>
#endif

#ifdef __CUDA_ARCH__
// Define inline implementations of the RNG methods for the device.
// (nvcc ignores the __device__ attribute in definitions, so this is only to
Expand Down Expand Up @@ -65,6 +70,14 @@ struct AllTrackManagers {
MParrayTracks *leakedTracks[ParticleType::NumParticleTypes];
};

#ifdef USE_SPLIT_KERNELS
struct HepEmBuffers {
G4HepEmElectronTrack *electronsHepEm;
G4HepEmElectronTrack *positronsHepEm;
G4HepEmGammaTrack *gammasHepEm;
};
#endif

// A data structure to transfer statistics after each iteration.
struct Stats {
adept::TrackManager<Track>::Stats mgr_stats[ParticleType::NumParticleTypes];
Expand All @@ -78,6 +91,9 @@ struct GPUstate {
ParticleType particles[ParticleType::NumParticleTypes];
AllTrackManagers allmgr_h; ///< Host pointers for track managers
AllTrackManagers allmgr_d; ///< Device pointers for track managers
#ifdef USE_SPLIT_KERNELS
HepEmBuffers hepEMBuffers_d;
#endif
// Create a stream to synchronize kernels of all particle types.
cudaStream_t stream; ///< all-particle sync stream
TrackData *toDevice_dev{nullptr}; ///< toDevice buffer of tracks
Expand Down
27 changes: 17 additions & 10 deletions include/AdePT/core/Track.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <AdePT/copcore/SystemOfUnits.h>
#include <AdePT/copcore/Ranluxpp.h>

#include <G4HepEmRandomEngine.hh>

#include <VecGeom/base/Vector3D.h>
#include <VecGeom/navigation/NavigationState.h>

Expand All @@ -17,11 +19,9 @@ struct Track {
using Precision = vecgeom::Precision;

int parentID{0}; // Stores the track id of the initial particle given to AdePT
int hitsurfID{0};

RanluxppDouble rngState;
double eKin;
double preStepEKin;
double numIALeft[3];
double initialRange;
double dynamicRangeFactor;
Expand All @@ -32,22 +32,29 @@ struct Track {
double properTime{0};

vecgeom::Vector3D<Precision> pos;
vecgeom::Vector3D<Precision> preStepPos;
vecgeom::Vector3D<Precision> dir;
vecgeom::Vector3D<Precision> preStepDir;
vecgeom::NavigationState navState;

#ifdef USE_SPLIT_KERNELS
RanluxppDouble newRNG;

// Variables used to store track info needed for scoring
double preStepEKin;
vecgeom::Vector3D<Precision> preStepPos;
vecgeom::Vector3D<Precision> preStepDir;
vecgeom::NavigationState nextState;
vecgeom::NavigationState preStepNavState;

// Variables used to store navigation results
bool reachedInteractionPoint{false};
long hitsurfID{0};
bool propagated{false};
double geometryStepLength{0};
double safety{0};

// Variables used to store physics results from G4HepEM
double geometricalStepLengthFromPhysics{0};
int winnerProcessIndex{0};
double preStepMFPs[3];
double PEmxSec{0};
// Variables used to store results from G4HepEM
bool restrictedPhysicalStepLength{false};
bool stopped{false};
#endif

__host__ __device__ double Uniform() { return rngState.Rndm(); }

Expand Down
Loading

0 comments on commit fa8ba18

Please sign in to comment.