diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a4dbc28..18418eed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,7 +139,7 @@ add_compile_options("$<$,$>: # - For Debug, generate full debug information - this completely disables optimizations! add_compile_options("$<$,$>:--device-debug>") # - For both, interleave the source in PTX to enhance the debugging experience. -add_compile_options("$<$,$,$>>:--source-in-ptx>") +add_compile_options("$<$,$,$>>:-G>") # Disable warnings from the CUDA frontend about unknown GCC pragmas - let the compiler decide what it likes. add_compile_options("$<$:-Xcudafe;--diag_suppress=unrecognized_gcc_pragma>") @@ -153,6 +153,8 @@ if(NOT WITH_FLUCT) add_compile_definitions(NOFLUCTUATION) endif() +# string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v") + #----------------------------------------------------------------------------# # Build Targets #----------------------------------------------------------------------------# diff --git a/examples/Example1/CMakeLists.txt b/examples/Example1/CMakeLists.txt index 595d455f..adc85a54 100644 --- a/examples/Example1/CMakeLists.txt +++ b/examples/Example1/CMakeLists.txt @@ -74,6 +74,8 @@ configure_file("macros/example1_ttbar_noadept.mac.in" "${PROJECT_BINARY_DIR}/exa # Tests +string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v") + add_test(NAME example1 COMMAND $ -m ${PROJECT_BINARY_DIR}/example1_large_stack.mac ) \ No newline at end of file diff --git a/examples/Example1/macros/example1_ttbar_LHCb.mac.in b/examples/Example1/macros/example1_ttbar_LHCb.mac.in index bfbf963f..3c1497a2 100644 --- a/examples/Example1/macros/example1_ttbar_LHCb.mac.in +++ b/examples/Example1/macros/example1_ttbar_LHCb.mac.in @@ -7,7 +7,7 @@ ## Geant4 macro for modelling simplified sampling calorimeters ## ============================================================================= ## -/run/numberOfThreads 1 +/run/numberOfThreads 8 /control/verbose 0 /run/verbose 0 /process/verbose 0 @@ -26,7 +26,7 @@ /adept/setMillionsOfTrackSlots 8 /adept/setMillionsOfHitSlots 1 ## Device stack limit -# /adept/setCUDAStackLimit 4096 +/adept/setCUDAStackLimit 4096 # If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored /adept/setTrackInAllRegions true diff --git a/include/AdePT/core/AdePTTransport.cuh b/include/AdePT/core/AdePTTransport.cuh index cbc76a98..3318fe29 100644 --- a/include/AdePT/core/AdePTTransport.cuh +++ b/include/AdePT/core/AdePTTransport.cuh @@ -10,7 +10,11 @@ #include #include #include -#include +// #include + + +#include + #include #ifdef VECGEOM_ENABLE_CUDA @@ -407,8 +411,21 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer & transportBlocks = (numGammas + TransportThreads - 1) / TransportThreads; transportBlocks = std::min(transportBlocks, MaxBlocks); #endif - TransportGammas<<>>( - gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev); + Physics1<<>>( + gammas.trackmgr, VolAuxArray::GetInstance().fAuxData_dev + ); + Transport1<<>>( + gammas.trackmgr, VolAuxArray::GetInstance().fAuxData_dev + ); + Relocation<<>>( + gammas.trackmgr, gammas.leakedTracks, VolAuxArray::GetInstance().fAuxData_dev + ); + Physics2<<>>( + gammas.trackmgr, secondaries, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev + ); + + // TransportGammas<<>>( + // gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev); COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream)); COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, gammas.event, 0)); diff --git a/include/AdePT/core/Track.cuh b/include/AdePT/core/Track.cuh index 2dd14ea3..49a9251d 100644 --- a/include/AdePT/core/Track.cuh +++ b/include/AdePT/core/Track.cuh @@ -17,9 +17,11 @@ struct Track { using Precision = vecgeom::Precision; int parentID{0}; // Stores the track id of the initial particle given to AdePT + int hitsurfID{0}; RanluxppDouble rngState; double eKin; + double preStepEKin; double numIALeft[3]; double initialRange; double dynamicRangeFactor; @@ -30,8 +32,22 @@ struct Track { double properTime{0}; vecgeom::Vector3D pos; + vecgeom::Vector3D preStepPos; vecgeom::Vector3D dir; + vecgeom::Vector3D preStepDir; vecgeom::NavigationState navState; + vecgeom::NavigationState nextState; + vecgeom::NavigationState preStepNavState; + + // Variables used to store navigation results + bool reachedInteractionPoint{false}; + double geometryStepLength{0}; + + // Variables used to store physics results from G4HepEM + double geometricalStepLengthFromPhysics{0}; + int winnerProcessIndex{0}; + double preStepMFPs[3]; + double PEmxSec{0}; __host__ __device__ double Uniform() { return rngState.Rndm(); } diff --git a/include/AdePT/kernels/gammas_experimental.cuh b/include/AdePT/kernels/gammas_experimental.cuh new file mode 100644 index 00000000..e89f6697 --- /dev/null +++ b/include/AdePT/kernels/gammas_experimental.cuh @@ -0,0 +1,396 @@ +// SPDX-FileCopyrightText: 2022 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +// Pull in implementation. +#include +#include +#include +#include + +__global__ void Physics1(adept::TrackManager *gammas, VolAuxData const *auxDataArray) +{ + int activeSize = gammas->fActiveTracks->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*gammas->fActiveTracks)[i]; + Track ¤tTrack = (*gammas)[slot]; + // Save current values for scoring + currentTrack.preStepEKin = currentTrack.eKin; + currentTrack.preStepPos = currentTrack.pos; + currentTrack.preStepDir = currentTrack.dir; + currentTrack.preStepNavState = currentTrack.navState; + // the MCC vector is indexed by the logical volume id +#ifndef ADEPT_USE_SURF + int lvolID = currentTrack.navState.Top()->GetLogicalVolume()->id(); +#else + int lvolID = currentTrack.navState.GetLogicalId(); +#endif + VolAuxData const &auxData = auxDataArray[lvolID]; + + // Init a track with the needed data to call into G4HepEm. + G4HepEmGammaTrack gammaTrack; + G4HepEmTrack *theTrack = gammaTrack.GetTrack(); + theTrack->SetEKin(currentTrack.eKin); + theTrack->SetMCIndex(auxData.fMCIndex); + + // Sample the `number-of-interaction-left` and put it into the track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = currentTrack.numIALeft[ip]; + if (numIALeft <= 0) { + numIALeft = -std::log(currentTrack.Uniform()); + } + theTrack->SetNumIALeft(numIALeft, ip); + } + + // Call G4HepEm to compute the physics step limit. + G4HepEmGammaManager::HowFar(&g4HepEmData, &g4HepEmPars, &gammaTrack); + + // Save the info we need from the G4HepEM track + currentTrack.geometricalStepLengthFromPhysics = theTrack->GetGStepLength(); + currentTrack.winnerProcessIndex = theTrack->GetWinnerProcessIndex(); + currentTrack.PEmxSec = gammaTrack.GetPEmxSec(); + currentTrack.preStepMFPs[0] = theTrack->GetMFP(0); + currentTrack.preStepMFPs[1] = theTrack->GetMFP(1); + currentTrack.preStepMFPs[2] = theTrack->GetMFP(2); + } +} + +__global__ void Transport1(adept::TrackManager *gammas, VolAuxData const *auxDataArray) +{ +#ifdef VECGEOM_FLOAT_PRECISION + const Precision kPush = 10 * vecgeom::kTolerance; +#else + const Precision kPush = 0.; +#endif +// constexpr Precision kPushOutRegion = 10 * vecgeom::kTolerance; +// constexpr int Pdg = 22; + int activeSize = gammas->fActiveTracks->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*gammas->fActiveTracks)[i]; + Track ¤tTrack = (*gammas)[slot]; + + // auto survive = [&](bool leak = false) { + // adeptint::TrackData trackdata; + // currentTrack.CopyTo(trackdata, Pdg); + // if (leak) + // leakedQueue->push_back(trackdata); + // else + // gammas->fNextTracks->push_back(slot); + // }; + + // Check if there's a volume boundary in between. + vecgeom::NavigationState nextState; + double geometryStepLength; +#ifdef ADEPT_USE_SURF + long hitsurf_index = -1; + geometryStepLength = AdePTNavigator::ComputeStepAndNextVolume(currentTrack.pos, currentTrack.dir, currentTrack.geometricalStepLengthFromPhysics, currentTrack.navState, + nextState, hitsurf_index, kPush); +#else + geometryStepLength = AdePTNavigator::ComputeStepAndNextVolume(currentTrack.pos, currentTrack.dir, currentTrack.geometricalStepLengthFromPhysics, currentTrack.navState, + nextState, kPush); +#endif + currentTrack.pos += geometryStepLength * currentTrack.dir; + + // Store the actual geometrical step length traveled + currentTrack.geometryStepLength = geometryStepLength; + + // Set boundary state in navState so the next step and secondaries get the + // correct information (navState = nextState only if relocated + // in case of a boundary; see below) + currentTrack.navState.SetBoundaryState(nextState.IsOnBoundary()); + + // Now init a G4HepEM track with the info it needs to update the the number-of-interaction-left + G4HepEmTrack theTrack; + theTrack.SetGStepLength(geometryStepLength); + theTrack.SetOnBoundary(nextState.IsOnBoundary()); + for (int i = 0; i < 3; i++) { + theTrack.SetMFP(currentTrack.preStepMFPs[i], i); + theTrack.SetNumIALeft(currentTrack.numIALeft[i], i); + } + // Update the number-of-interaction-left + G4HepEmGammaManager::UpdateNumIALeft(&theTrack); + + // Save the `number-of-interaction-left` in our track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = theTrack.GetNumIALeft(ip); + currentTrack.numIALeft[ip] = numIALeft; + } + + // Update the flight times of the particle + double deltaTime = theTrack.GetGStepLength() / copcore::units::kCLight; + currentTrack.globalTime += deltaTime; + currentTrack.localTime += deltaTime; + + // Save the hitsurf index and the next state in the track + currentTrack.hitsurfID = hitsurf_index; + currentTrack.nextState = nextState; + } +} + +__global__ void Relocation(adept::TrackManager *gammas, MParrayTracks *leakedQueue, VolAuxData const *auxDataArray) +{ +// #ifdef VECGEOM_FLOAT_PRECISION +// const Precision kPush = 10 * vecgeom::kTolerance; +// #else +// const Precision kPush = 0.; +// #endif + constexpr Precision kPushOutRegion = 10 * vecgeom::kTolerance; + constexpr int Pdg = 22; + int activeSize = gammas->fActiveTracks->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*gammas->fActiveTracks)[i]; + Track ¤tTrack = (*gammas)[slot]; + + auto survive = [&](bool leak = false) { + adeptint::TrackData trackdata; + currentTrack.CopyTo(trackdata, Pdg); + if (leak) + leakedQueue->push_back(trackdata); + else + gammas->fNextTracks->push_back(slot); + }; + + // Last transportation call, in case we are on a boundary we need to relocate to the next volume + // Every track on a boundary will stop being processed here (no interaction) + // For now (experimental kernels) I just mark it in the track, and in the next kernel we just + // return inmediately (but ideally we won't even give those tracks to the kernel) + if (currentTrack.nextState.IsOnBoundary()) { + currentTrack.reachedInteractionPoint = false; + // Kill the particle if it left the world. + if (!currentTrack.nextState.IsOutside()) { +#ifdef ADEPT_USE_SURF + AdePTNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, currentTrack.hitsurfID, currentTrack.nextState); + if (currentTrack.nextState.IsOutside()) continue; +#else + AdePTNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, currentTrack.nextState); +#endif + // Move to the next boundary. + currentTrack.navState = currentTrack.nextState; + // Check if the next volume belongs to the GPU region and push it to the appropriate queue +#ifndef ADEPT_USE_SURF + const int nextlvolID = currentTrack.navState.Top()->GetLogicalVolume()->id(); +#else + const int nextlvolID = currentTrack.navState.GetLogicalId(); +#endif + VolAuxData const &nextauxData = auxDataArray[nextlvolID]; + if (nextauxData.fGPUregion > 0) + survive(); + else { + // To be safe, just push a bit the track exiting the GPU region to make sure + // Geant4 does not relocate it again inside the same region + currentTrack.pos += kPushOutRegion * currentTrack.dir; + survive(/*leak*/ true); + } + } + continue; + } else { + currentTrack.reachedInteractionPoint = true; + } + } +} + +template +__global__ void Physics2(adept::TrackManager *gammas, Secondaries secondaries, Scoring *userScoring, + VolAuxData const *auxDataArray) +{ + int activeSize = gammas->fActiveTracks->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*gammas->fActiveTracks)[i]; + Track ¤tTrack = (*gammas)[slot]; + +#ifndef ADEPT_USE_SURF + int lvolID = currentTrack.navState.Top()->GetLogicalVolume()->id(); +#else + int lvolID = currentTrack.navState.GetLogicalId(); +#endif + VolAuxData const &auxData = auxDataArray[lvolID]; + + auto survive = [&]() { gammas->fNextTracks->push_back(slot); }; + + // Temporary solution + if (!currentTrack.reachedInteractionPoint) { + continue; + } + + // Reset number of interaction left for the winner discrete process. + // (Will be resampled in the next iteration.) + currentTrack.numIALeft[currentTrack.winnerProcessIndex] = -1.0; + + // Perform the discrete interaction. + G4HepEmRandomEngine rnge(¤tTrack.rngState); + // We might need one branched RNG state, prepare while threads are synchronized. + RanluxppDouble newRNG(currentTrack.rngState.Branch()); + + switch (currentTrack.winnerProcessIndex) { + case 0: { + // Invoke gamma conversion to e-/e+ pairs, if the energy is above the threshold. + if (currentTrack.eKin < 2 * copcore::units::kElectronMassC2) { + survive(); + continue; + } + + double logEnergy = std::log(currentTrack.eKin); + double elKinEnergy, posKinEnergy; + G4HepEmGammaInteractionConversion::SampleKinEnergies(&g4HepEmData, currentTrack.eKin, logEnergy, auxData.fMCIndex, + elKinEnergy, posKinEnergy, &rnge); + + double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirSecondaryEl[3], dirSecondaryPos[3]; + G4HepEmGammaInteractionConversion::SampleDirections(dirPrimary, dirSecondaryEl, dirSecondaryPos, elKinEnergy, + posKinEnergy, &rnge); + + Track &electron = secondaries.electrons->NextTrack(); + Track &positron = secondaries.positrons->NextTrack(); + + adept_scoring::AccountProduced(userScoring, /*numElectrons*/ 1, /*numPositrons*/ 1, /*numGammas*/ 0); + + electron.InitAsSecondary(currentTrack.pos, currentTrack.navState, currentTrack.globalTime); + electron.parentID = currentTrack.parentID; + electron.rngState = newRNG; + electron.eKin = elKinEnergy; + electron.dir.Set(dirSecondaryEl[0], dirSecondaryEl[1], dirSecondaryEl[2]); + + positron.InitAsSecondary(currentTrack.pos, currentTrack.navState, currentTrack.globalTime); + // Reuse the RNG state of the dying track. + positron.parentID = currentTrack.parentID; + positron.rngState = currentTrack.rngState; + positron.eKin = posKinEnergy; + positron.dir.Set(dirSecondaryPos[0], dirSecondaryPos[1], dirSecondaryPos[2]); + + // The current track is killed by not enqueuing into the next activeQueue. + break; + } + case 1: { + // Invoke Compton scattering of gamma. + constexpr double LowEnergyThreshold = 100 * copcore::units::eV; + if (currentTrack.eKin < LowEnergyThreshold) { + survive(); + continue; + } + const double origDirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirPrimary[3]; + const double newEnergyGamma = G4HepEmGammaInteractionCompton::SamplePhotonEnergyAndDirection( + currentTrack.eKin, dirPrimary, origDirPrimary, &rnge); + vecgeom::Vector3D newDirGamma(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + + const double energyEl = currentTrack.eKin - newEnergyGamma; + if (energyEl > LowEnergyThreshold) { + // Create a secondary electron and sample/compute directions. + Track &electron = secondaries.electrons->NextTrack(); + adept_scoring::AccountProduced(userScoring, /*numElectrons*/ 1, /*numPositrons*/ 0, /*numGammas*/ 0); + + electron.InitAsSecondary(currentTrack.pos, currentTrack.navState, currentTrack.globalTime); + electron.parentID = currentTrack.parentID; + electron.rngState = newRNG; + electron.eKin = energyEl; + electron.dir = currentTrack.eKin * currentTrack.dir - newEnergyGamma * newDirGamma; + electron.dir.Normalize(); + } else { + if (auxData.fSensIndex >= 0) + adept_scoring::RecordHit(userScoring, + currentTrack.parentID, // Track ID + 2, // Particle type + currentTrack.geometryStepLength, // Step length + 0, // Total Edep + ¤tTrack.preStepNavState, // Pre-step point navstate + ¤tTrack.preStepPos, // Pre-step point position + ¤tTrack.preStepDir, // Pre-step point momentum direction + nullptr, // Pre-step point polarization + currentTrack.preStepEKin, // Pre-step point kinetic energy + 0, // Pre-step point charge + ¤tTrack.navState, // Post-step point navstate + ¤tTrack.pos, // Post-step point position + ¤tTrack.dir, // Post-step point momentum direction + nullptr, // Post-step point polarization + newEnergyGamma, // Post-step point kinetic energy + 0); // Post-step point charge + } + + // Check the new gamma energy and deposit if below threshold. + if (newEnergyGamma > LowEnergyThreshold) { + currentTrack.eKin = newEnergyGamma; + currentTrack.dir = newDirGamma; + survive(); + } else { + if (auxData.fSensIndex >= 0) + adept_scoring::RecordHit(userScoring, + currentTrack.parentID, // Track ID + 2, // Particle type + currentTrack.geometryStepLength, // Step length + 0, // Total Edep + ¤tTrack.preStepNavState, // Pre-step point navstate + ¤tTrack.preStepPos, // Pre-step point position + ¤tTrack.preStepDir, // Pre-step point momentum direction + nullptr, // Pre-step point polarization + currentTrack.preStepEKin, // Pre-step point kinetic energy + 0, // Pre-step point charge + ¤tTrack.navState, // Post-step point navstate + ¤tTrack.pos, // Post-step point position + ¤tTrack.dir, // Post-step point momentum direction + nullptr, // Post-step point polarization + newEnergyGamma, // Post-step point kinetic energy + 0); // Post-step point charge + // The current track is killed by not enqueuing into the next activeQueue. + } + break; + } + case 2: { + // Invoke photoelectric process. + const double theLowEnergyThreshold = 1 * copcore::units::eV; + + const double bindingEnergy = G4HepEmGammaInteractionPhotoelectric::SelectElementBindingEnergy( + &g4HepEmData, auxData.fMCIndex, currentTrack.PEmxSec, currentTrack.eKin, &rnge); + + double edep = bindingEnergy; + const double photoElecE = currentTrack.eKin - edep; + if (photoElecE > theLowEnergyThreshold) { + // Create a secondary electron and sample directions. + Track &electron = secondaries.electrons->NextTrack(); + adept_scoring::AccountProduced(userScoring, /*numElectrons*/ 1, /*numPositrons*/ 0, /*numGammas*/ 0); + + double dirGamma[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirPhotoElec[3]; + G4HepEmGammaInteractionPhotoelectric::SamplePhotoElectronDirection(photoElecE, dirGamma, dirPhotoElec, &rnge); + + electron.InitAsSecondary(currentTrack.pos, currentTrack.navState, currentTrack.globalTime); + electron.parentID = currentTrack.parentID; + electron.rngState = newRNG; + electron.eKin = photoElecE; + electron.dir.Set(dirPhotoElec[0], dirPhotoElec[1], dirPhotoElec[2]); + } else { + edep = currentTrack.eKin; + } + if (auxData.fSensIndex >= 0) + adept_scoring::RecordHit(userScoring, + currentTrack.parentID, // Track ID + 2, // Particle type + currentTrack.geometryStepLength, // Step length + edep, // Total Edep + ¤tTrack.preStepNavState, // Pre-step point navstate + ¤tTrack.preStepPos, // Pre-step point position + ¤tTrack.preStepDir, // Pre-step point momentum direction + nullptr, // Pre-step point polarization + currentTrack.preStepEKin, // Pre-step point kinetic energy + 0, // Pre-step point charge + ¤tTrack.navState, // Post-step point navstate + ¤tTrack.pos, // Post-step point position + ¤tTrack.dir, // Post-step point momentum direction + nullptr, // Post-step point polarization + 0, // Post-step point kinetic energy + 0); // Post-step point charge + // The current track is killed by not enqueuing into the next activeQueue. + break; + } + } + } +} \ No newline at end of file