diff --git a/examples/Example12/CMakeLists.txt b/examples/Example12/CMakeLists.txt new file mode 100644 index 00000000..1c4206bd --- /dev/null +++ b/examples/Example12/CMakeLists.txt @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: 2021 CERN +# SPDX-License-Identifier: Apache-2.0 + +option(BUILTIN_G4VECGEOMNAV "Build G4VecGeomNav from source (requires VecGeom)" ON) + +if(NOT TARGET G4HepEm::g4HepEm) + message(STATUS "Disabling example12 (needs G4HepEm)") + return() +endif() + +# Start Configuration of G4VecGeomNav - converter from G4 to VecGeom volumes +# ----- ------------- ------------ +# Minimum version of G4VecGeomNav we need. +set(G4VecGeomNav_VERSION "") # 2021.06.18") # 0.1") + +if(BUILTIN_G4VECGEOMNAV) + include(FetchContent) + + FetchContent_Declare( + g4vecgeomnav + GIT_REPOSITORY https://gitlab.cern.ch/VecGeom/g4vecgeomnav.git + GIT_TAG 1a002fbf2bccd81827b6100f86e2ad82433a75e9 + ) + + FetchContent_GetProperties(g4vecgeomnav) + + if(NOT g4vecgeomnav_POPULATED) + FetchContent_Populate(g4vecgeomnav) + message(STATUS "${g4vecgeomnav_SOURCE_DIR}, ${g4vecgeomnav_BINARY_DIR}") + add_subdirectory(${g4vecgeomnav_SOURCE_DIR} ${g4vecgeomnav_BINARY_DIR}) + endif() +else() + find_package(G4VecGeomNav REQUIRED) +endif() +# End configuration of G4VecGeomNav +# --- ------------- ------------ + +add_executable(example12 example12.cpp example12.cu electrons.cu gammas.cu) +target_link_libraries(example12 + PRIVATE + AdePT CopCore::CopCore + G4VecGeomNav::g4vecgeomnav + VecGeom::vecgeom VecGeom::vecgeomcuda_static VecGeom::vgdml + ${Geant4_LIBRARIES} + G4HepEm::g4HepEmData G4HepEm::g4HepEmInit G4HepEm::g4HepEmRun) +set_target_properties(example12 PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON) + +add_test(NAME example12 COMMAND example12 -gdml_name "${TESTING_GDML}") diff --git a/examples/Example12/README.md b/examples/Example12/README.md new file mode 100644 index 00000000..91c3e8ff --- /dev/null +++ b/examples/Example12/README.md @@ -0,0 +1,50 @@ + + +## Example 11 + +Demonstrator of particle transportation on GPUs, with: + + * geometry via VecGeom and a magnetic field with constant Bz, + * physics processes for e-/e+ and gammas using G4HepEm. + * accelerated navigation using newly added BVH class in VecGeom + +Electrons, positrons, and gammas are stored in separate containers in device memory. +Free positions in the storage are handed out with monotonic slot numbers, slots are not reused. +Active tracks are passed via three queues per particle type (see `struct ParticleQueues`). +Results are reproducible using one RANLUX++ state per track. + +### Kernels + +This example uses one stream per particle type to launch kernels asynchronously. +They are synchronized via a forth stream using CUDA events. + +#### `TransportElectrons` + +1. Determine physics step limit. +2. Call magnetic field to get geometry step length. +3. Apply continuous effects; kill track if stopped. +4. If the particle reaches a boundary, perform relocation. +5. If not, and if there is a discrete process: + 1. Sample the final state. + 2. Update the primary and produce secondaries. + +#### `TransportGammas` + +1. Determine the physics step limit. +2. Query VecGeom to get geometry step length (no magnetic field for neutral particles!). +3. If the particle reaches a boundary, perform relocation. +4. If not, and if there is a discrete process: + 1. Sample the final state. + 2. Update the primary and produce secondaries. + +#### `FinishIteration` + +Clear the queues and return the tracks in flight. +This kernel runs after all secondary particles were produced. + +#### `InitPrimaries` and `InitParticleQueues` + +Used to initialize multiple primary particles with separate seeds. diff --git a/examples/Example12/electrons.cu b/examples/Example12/electrons.cu new file mode 100644 index 00000000..818d9442 --- /dev/null +++ b/examples/Example12/electrons.cu @@ -0,0 +1,263 @@ +// SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example11.cuh" + +#include + +#include +#include + +#include +#include +#include +#include +#include +// Pull in implementation. +#include +#include +#include +#include +#include +#include + +// Compute the physics and geometry step limit, transport the electrons while +// applying the continuous effects and maybe a discrete process that could +// generate secondaries. +template +static __device__ __forceinline__ void TransportElectrons(Track *electrons, const adept::MParray *active, + Secondaries &secondaries, adept::MParray *activeQueue, + GlobalScoring *scoring) +{ + constexpr int Charge = IsElectron ? -1 : 1; + constexpr double Mass = copcore::units::kElectronMassC2; + fieldPropagatorConstBz fieldPropagatorBz(BzFieldValue); + + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*active)[i]; + Track ¤tTrack = electrons[slot]; + + // Init a track with the needed data to call into G4HepEm. + G4HepEmElectronTrack elTrack; + G4HepEmTrack *theTrack = elTrack.GetTrack(); + theTrack->SetEKin(currentTrack.energy); + + // Fetch the material cuts-couple index from the G4 track + auto cur_state= track->currentState(); + // auto cur_nav_index= cur_state.GetNavIndex(cur_state.GetLevel()); + VPlacedVolume const * placedVol = cur_state.Top(); + LogicalVolume const * logicalVol = placedVol->GetLogicalVolume(); + int theMCIndex = (int) logicalVol->GetMaterialCutsPtr(); // It's not the pointer, it's an index !! + std::cout << " Volume (log): " << logicalVol->GetName() << " mat-cuts-cpl = " << theMCindex << "\n"; + + theTrack->SetMCIndex(theMCIndex); + + theTrack->SetCharge(Charge); + + // Sample the `number-of-interaction-left` and put it into the track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = currentTrack.numIALeft[ip]; + if (numIALeft <= 0) { + numIALeft = -std::log(currentTrack.Uniform()); + currentTrack.numIALeft[ip] = numIALeft; + } + theTrack->SetNumIALeft(numIALeft, ip); + } + + // Call G4HepEm to compute the physics step limit. + G4HepEmElectronManager::HowFar(&g4HepEmData, &g4HepEmPars, &elTrack); + + // Get result into variables. + double geometricalStepLengthFromPhysics = theTrack->GetGStepLength(); + // The phyiscal step length is the amount that the particle experiences + // which might be longer than the geometrical step length due to MSC. As + // long as we call PerformContinuous in the same kernel we don't need to + // care, but we need to make this available when splitting the operations. + // double physicalStepLength = elTrack.GetPStepLength(); + int winnerProcessIndex = theTrack->GetWinnerProcessIndex(); + // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we + // also need to carry them over! + + // Check if there's a volume boundary in between. + vecgeom::NavStateIndex nextState; + double geometryStepLength = fieldPropagatorBz.ComputeStepAndPropagatedState( + currentTrack.energy, Mass, Charge, geometricalStepLengthFromPhysics, currentTrack.pos, currentTrack.dir, + currentTrack.navState, nextState); + + if (nextState.IsOnBoundary()) { + theTrack->SetGStepLength(geometryStepLength); + theTrack->SetOnBoundary(true); + } + + // Apply continuous effects. + bool stopped = G4HepEmElectronManager::PerformContinuous(&g4HepEmData, &g4HepEmPars, &elTrack); + // Collect the changes. + currentTrack.energy = theTrack->GetEKin(); + atomicAdd(&scoring->energyDeposit, theTrack->GetEnergyDeposit()); + + // Save the `number-of-interaction-left` in our track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = theTrack->GetNumIALeft(ip); + currentTrack.numIALeft[ip] = numIALeft; + } + + if (stopped) { + if (!IsElectron) { + // Annihilate the stopped positron into two gammas heading to opposite + // directions (isotropic). + Track &gamma1 = secondaries.gammas.NextTrack(); + Track &gamma2 = secondaries.gammas.NextTrack(); + atomicAdd(&scoring->secondaries, 2); + + const double cost = 2 * currentTrack.Uniform() - 1; + const double sint = sqrt(1 - cost * cost); + const double phi = k2Pi * currentTrack.Uniform(); + double sinPhi, cosPhi; + sincos(phi, &sinPhi, &cosPhi); + + gamma1.InitAsSecondary(/*parent=*/currentTrack); + gamma1.rngState = currentTrack.rngState.Branch(); + gamma1.energy = copcore::units::kElectronMassC2; + gamma1.dir.Set(sint * cosPhi, sint * sinPhi, cost); + + gamma2.InitAsSecondary(/*parent=*/currentTrack); + // Reuse the RNG state of the dying track. + gamma2.rngState = currentTrack.rngState; + gamma2.energy = copcore::units::kElectronMassC2; + gamma2.dir = -gamma1.dir; + } + // Particles are killed by not enqueuing them into the new activeQueue. + continue; + } + + if (nextState.IsOnBoundary()) { + // For now, just count that we hit something. + atomicAdd(&scoring->hits, 1); + + // Kill the particle if it left the world. + if (nextState.Top() != nullptr) { + activeQueue->push_back(slot); + + // Move to the next boundary. + BVHNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, nextState); + currentTrack.navState = nextState; + } + continue; + } else if (winnerProcessIndex < 0) { + // No discrete process, move on. + activeQueue->push_back(slot); + continue; + } + + // Reset number of interaction left for the winner discrete process. + // (Will be resampled in the next iteration.) + currentTrack.numIALeft[winnerProcessIndex] = -1.0; + + // Check if a delta interaction happens instead of the real discrete process. + if (G4HepEmElectronManager::CheckDelta(&g4HepEmData, theTrack, currentTrack.Uniform())) { + // A delta interaction happened, move on. + activeQueue->push_back(slot); + continue; + } + + // Perform the discrete interaction. + RanluxppDoubleEngine rnge(¤tTrack.rngState); + // We will need one branched RNG state, prepare while threads are synchronized. + RanluxppDouble newRNG(currentTrack.rngState.Branch()); + + const double energy = currentTrack.energy; + const double theElCut = g4HepEmData.fTheMatCutData->fMatCutData[theMCIndex].fSecElProdCutE; + + switch (winnerProcessIndex) { + case 0: { + // Invoke ionization (for e-/e+): + double deltaEkin = (IsElectron) ? G4HepEmElectronInteractionIoni::SampleETransferMoller(theElCut, energy, &rnge) + : G4HepEmElectronInteractionIoni::SampleETransferBhabha(theElCut, energy, &rnge); + + double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirSecondary[3]; + G4HepEmElectronInteractionIoni::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge); + + Track &secondary = secondaries.electrons.NextTrack(); + atomicAdd(&scoring->secondaries, 1); + + secondary.InitAsSecondary(/*parent=*/currentTrack); + secondary.rngState = newRNG; + secondary.energy = deltaEkin; + secondary.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]); + + currentTrack.energy = energy - deltaEkin; + currentTrack.dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + // The current track continues to live. + activeQueue->push_back(slot); + break; + } + case 1: { + // Invoke model for Bremsstrahlung: either SB- or Rel-Brem. + double logEnergy = std::log(energy); + double deltaEkin = energy < g4HepEmPars.fElectronBremModelLim + ? G4HepEmElectronInteractionBrem::SampleETransferSB(&g4HepEmData, energy, logEnergy, + theMCIndex, &rnge, IsElectron) + : G4HepEmElectronInteractionBrem::SampleETransferRB(&g4HepEmData, energy, logEnergy, + theMCIndex, &rnge, IsElectron); + + double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirSecondary[3]; + G4HepEmElectronInteractionBrem::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge); + + Track &gamma = secondaries.gammas.NextTrack(); + atomicAdd(&scoring->secondaries, 1); + + gamma.InitAsSecondary(/*parent=*/currentTrack); + gamma.rngState = newRNG; + gamma.energy = deltaEkin; + gamma.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]); + + currentTrack.energy = energy - deltaEkin; + currentTrack.dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + // The current track continues to live. + activeQueue->push_back(slot); + break; + } + case 2: { + // Invoke annihilation (in-flight) for e+ + double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double theGamma1Ekin, theGamma2Ekin; + double theGamma1Dir[3], theGamma2Dir[3]; + G4HepEmPositronInteractionAnnihilation::SampleEnergyAndDirectionsInFlight( + energy, dirPrimary, &theGamma1Ekin, theGamma1Dir, &theGamma2Ekin, theGamma2Dir, &rnge); + + Track &gamma1 = secondaries.gammas.NextTrack(); + Track &gamma2 = secondaries.gammas.NextTrack(); + atomicAdd(&scoring->secondaries, 2); + + gamma1.InitAsSecondary(/*parent=*/currentTrack); + gamma1.rngState = newRNG; + gamma1.energy = theGamma1Ekin; + gamma1.dir.Set(theGamma1Dir[0], theGamma1Dir[1], theGamma1Dir[2]); + + gamma2.InitAsSecondary(/*parent=*/currentTrack); + // Reuse the RNG state of the dying track. + gamma2.rngState = currentTrack.rngState; + gamma2.energy = theGamma2Ekin; + gamma2.dir.Set(theGamma2Dir[0], theGamma2Dir[1], theGamma2Dir[2]); + + // The current track is killed by not enqueuing into the next activeQueue. + break; + } + } + } +} + +// Instantiate kernels for electrons and positrons. +__global__ void TransportElectrons(Track *electrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *scoring) +{ + TransportElectrons(electrons, active, secondaries, activeQueue, scoring); +} +__global__ void TransportPositrons(Track *positrons, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *scoring) +{ + TransportElectrons(positrons, active, secondaries, activeQueue, scoring); +} diff --git a/examples/Example12/example12.cpp b/examples/Example12/example12.cpp new file mode 100644 index 00000000..6f524617 --- /dev/null +++ b/examples/Example12/example12.cpp @@ -0,0 +1,151 @@ +/x/ SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example11.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +// Necessary state ... +// static G4GDMLParser gParser; +static G4VPhysicalVolume* gWorldPhysical = nullptr; + +// Configuration 'parameters' +static G4String fGDMLFileName= "Default.gdml"; // Change it ... +bool cautiousNavigator= true; // Use it (true) for first runs and tests. False otherwise. + +static +G4VPhysicalVolume* ConstructG4geometry() // Replacing G4VUserDetectorConstruction::Construct() +{ + std:cout << "ConstructG4 geometry: reading GDML file : " << fGDMLFileName << "\n"; + G4GDMLParser gdmlParser; + gdmlParser.Read(fGDMLFileName, false); + auto worldPhysical = const_cast(gParser.GetWorldVolume()); + + if (worldPhysical==nullptr) { + G4ExceptionDescription ed; + ed << "World volume not set properly check your setup selection criteria" + << "or GDML input!" << G4endl; + G4Exception( "ConstructG4Geometry() failed", + "GeometryCreationError_01", FatalException, ed ); + + } + return worldPhysical; +} + +static void initGeant4() +{ + // parser.SetOverlapCheck(true); + G4TransportationManager *trMgr = + G4TransportationManager::GetTransportationManager(); + assert(trMgr); + + gWorldPhysical= ConstructG4geometry(); + + G4Navigator *nav = trMgr->GetNavigatorForTracking(); + nav->SetWorldVolume(gWorldPhysical); + + // Configure G4 Navigator - to best report errors (optional) + assert(nav); + if( cautiousNavigator ) { + nav->CheckMode(true); + std::cout << "Enabled Check mode in G4Navigator"; + nav->SetVerboseLevel(1); + std::cout << "Enabled Verbose 1 in G4Navigator"; + } + + // write back + // gParser.Write("out.gdml", gWorldPhysical); + + // Manage the magnetic field (future extension) + // auto fFieldMgr = trMgr->GetFieldManager(); + + // Fix the visibility of the world (for Geant4 11.0-beta ) + gWorldPhysical->GetLogicalVolume()->SetVisAttributes(G4VisAttributes::GetInvisible()); + + // + // --- Create particles that have secondary production threshold. + G4Gamma::Gamma(); + G4Electron::Electron(); + G4Positron::Positron(); + G4Proton::Proton(); + G4ParticleTable *partTable = G4ParticleTable::GetParticleTable(); + partTable->SetReadiness(); + + // + // --- Create production - cuts object and set the secondary production threshold. + G4ProductionCuts *productionCuts = new G4ProductionCuts(); + constexpr G4double ProductionCut = 1 * mm; + productionCuts->SetProductionCut(ProductionCut); + // + // --- Register a region for the world. + G4Region *reg = new G4Region("default"); + reg->AddRootLogicalVolume(worldLog); + reg->UsedInMassGeometry(true); + reg->SetProductionCuts(productionCuts); + // + // --- Update the couple tables. + G4ProductionCutsTable *theCoupleTable = G4ProductionCutsTable::GetProductionCutsTable(); + theCoupleTable->UpdateCoupleTable(world); +} + +void InitBVH() +{ + vecgeom::cxx::BVHManager::Init(); + vecgeom::cxx::BVHManager::DeviceInit(); +} + +int main(int argc, char *argv[]) +{ +#ifndef VECGEOM_USE_NAVINDEX + std::cout << "### VecGeom must be compiled with USE_NAVINDEX support to run this.\n"; + return 2; +#endif + + OPTION_STRING(gdml_name, "trackML.gdml"); + OPTION_INT(cache_depth, 0); // 0 = full depth + OPTION_INT(particles, 1); + OPTION_DOUBLE(energy, 100); // entered in GeV + energy *= copcore::units::GeV; + + InitGeant4(); + + // This will convert the geometry to VecGeom and implicitely also set up + // the VecGeom navigator. + // Q: does it change the G4 Navigator to use VecGeom Navigation ????? JA 07.09.2021 + G4VecGeomConverter::Instance().SetVerbose(1); + G4VecGeomConverter::Instance().ConvertG4Geometry(gWorldPhysical); + G4cout << vecgeom::GeoManager::Instance().getMaxDepth() << "\n"; +#endif + + const vecgeom::VPlacedVolume *world = vecgeom::GeoManager::Instance().GetWorld(); + if (!world) return 4; + + example11(world, particles, energy); +} diff --git a/examples/Example12/example12.cu b/examples/Example12/example12.cu new file mode 100644 index 00000000..7d12f4bd --- /dev/null +++ b/examples/Example12/example12.cu @@ -0,0 +1,360 @@ +// SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example11.h" +#include "example11.cuh" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +__constant__ __device__ struct G4HepEmParameters g4HepEmPars; +__constant__ __device__ struct G4HepEmData g4HepEmData; + +__constant__ __device__ int Zero = 0; + +struct G4HepEmState { + G4HepEmData data; + G4HepEmParameters parameters; +}; + +static G4HepEmState *InitG4HepEm() +{ + G4HepEmState *state = new G4HepEmState; + InitG4HepEmData(&state->data); + InitHepEmParameters(&state->parameters); + + InitMaterialAndCoupleData(&state->data, &state->parameters); + + InitElectronData(&state->data, &state->parameters, true); + InitElectronData(&state->data, &state->parameters, false); + InitGammaData(&state->data, &state->parameters); + + G4HepEmMatCutData *cutData = state->data.fTheMatCutData; + std::cout << "fNumG4MatCuts = " << cutData->fNumG4MatCuts << ", fNumMatCutData = " << cutData->fNumMatCutData + << std::endl; + + // Copy to GPU. + CopyG4HepEmDataToGPU(&state->data); + COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmPars, &state->parameters, sizeof(G4HepEmParameters))); + + // Create G4HepEmData with the device pointers. + G4HepEmData dataOnDevice; + dataOnDevice.fTheMatCutData = state->data.fTheMatCutData_gpu; + dataOnDevice.fTheMaterialData = state->data.fTheMaterialData_gpu; + dataOnDevice.fTheElementData = state->data.fTheElementData_gpu; + dataOnDevice.fTheElectronData = state->data.fTheElectronData_gpu; + dataOnDevice.fThePositronData = state->data.fThePositronData_gpu; + dataOnDevice.fTheSBTableData = state->data.fTheSBTableData_gpu; + dataOnDevice.fTheGammaData = state->data.fTheGammaData_gpu; + // The other pointers should never be used. + dataOnDevice.fTheMatCutData_gpu = nullptr; + dataOnDevice.fTheMaterialData_gpu = nullptr; + dataOnDevice.fTheElementData_gpu = nullptr; + dataOnDevice.fTheElectronData_gpu = nullptr; + dataOnDevice.fThePositronData_gpu = nullptr; + dataOnDevice.fTheSBTableData_gpu = nullptr; + dataOnDevice.fTheGammaData_gpu = nullptr; + + COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmData, &dataOnDevice, sizeof(G4HepEmData))); + + return state; +} + +static void FreeG4HepEm(G4HepEmState *state) +{ + FreeG4HepEmData(&state->data); + delete state; +} + +// A bundle of queues per particle type: +// * Two for active particles, one for the current iteration and the second for the next. +struct ParticleQueues { + adept::MParray *currentlyActive; + adept::MParray *nextActive; + + void SwapActive() { std::swap(currentlyActive, nextActive); } +}; + +struct ParticleType { + Track *tracks; + SlotManager *slotManager; + ParticleQueues queues; + cudaStream_t stream; + cudaEvent_t event; + + enum { + Electron = 0, + Positron = 1, + Gamma = 2, + + NumParticleTypes, + }; +}; + +// A bundle of queues for the three particle types. +struct AllParticleQueues { + ParticleQueues queues[ParticleType::NumParticleTypes]; +}; + +// Kernel to initialize the set of queues per particle type. +__global__ void InitParticleQueues(ParticleQueues queues, size_t Capacity) +{ + adept::MParray::MakeInstanceAt(Capacity, queues.currentlyActive); + adept::MParray::MakeInstanceAt(Capacity, queues.nextActive); +} + +// Kernel function to initialize a set of primary particles. +__global__ void InitPrimaries(ParticleGenerator generator, int particles, double energy, + const vecgeom::VPlacedVolume *world) +{ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particles; i += blockDim.x * gridDim.x) { + Track &track = generator.NextTrack(); + + track.rngState.SetSeed(314159265 * (i + 1)); + track.energy = energy; + track.numIALeft[0] = -1.0; + track.numIALeft[1] = -1.0; + track.numIALeft[2] = -1.0; + + track.pos = {0, 0, 0}; + track.dir = {1.0, 0, 0}; + BVHNavigator::LocatePointIn(world, track.pos, track.navState, true); + } +} + +// A data structure to transfer statistics after each iteration. +struct Stats { + GlobalScoring scoring; + int inFlight[ParticleType::NumParticleTypes]; +}; + +// Finish iteration: clear queues and fill statistics. +__global__ void FinishIteration(AllParticleQueues all, const GlobalScoring *scoring, Stats *stats) +{ + stats->scoring = *scoring; + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + all.queues[i].currentlyActive->clear(); + stats->inFlight[i] = all.queues[i].nextActive->size(); + } +} + +void example11(const vecgeom::cxx::VPlacedVolume *world, int numParticles, double energy) +{ + auto &cudaManager = vecgeom::cxx::CudaManager::Instance(); + cudaManager.LoadGeometry(world); + cudaManager.Synchronize(); + + const vecgeom::cuda::VPlacedVolume *world_dev = cudaManager.world_gpu(); + + InitBVH(); + + G4HepEmState *state = InitG4HepEm(); + + // Capacity of the different containers aka the maximum number of particles. + constexpr int Capacity = 256 * 1024; + + std::cout << "INFO: capacity of containers set to " << Capacity << std::endl; + + // Allocate structures to manage tracks of an implicit type: + // * memory to hold the actual Track elements, + // * objects to manage slots inside the memory, + // * queues of slots to remember active particle, + // * a stream and an event for synchronization of kernels. + constexpr size_t TracksSize = sizeof(Track) * Capacity; + constexpr size_t ManagerSize = sizeof(SlotManager); + const size_t QueueSize = adept::MParray::SizeOfInstance(Capacity); + + ParticleType particles[ParticleType::NumParticleTypes]; + SlotManager slotManagerInit(Capacity); + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].tracks, TracksSize)); + + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].slotManager, ManagerSize)); + COPCORE_CUDA_CHECK(cudaMemcpy(particles[i].slotManager, &slotManagerInit, ManagerSize, cudaMemcpyHostToDevice)); + + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.currentlyActive, QueueSize)); + COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.nextActive, QueueSize)); + InitParticleQueues<<<1, 1>>>(particles[i].queues, Capacity); + + COPCORE_CUDA_CHECK(cudaStreamCreate(&particles[i].stream)); + COPCORE_CUDA_CHECK(cudaEventCreate(&particles[i].event)); + } + COPCORE_CUDA_CHECK(cudaDeviceSynchronize()); + + ParticleType &electrons = particles[ParticleType::Electron]; + ParticleType &positrons = particles[ParticleType::Positron]; + ParticleType &gammas = particles[ParticleType::Gamma]; + + // Create a stream to synchronize kernels of all particle types. + cudaStream_t stream; + COPCORE_CUDA_CHECK(cudaStreamCreate(&stream)); + + // Allocate and initialize scoring and statistics. + GlobalScoring *scoring = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&scoring, sizeof(GlobalScoring))); + COPCORE_CUDA_CHECK(cudaMemset(scoring, 0, sizeof(GlobalScoring))); + + Stats *stats_dev = nullptr; + COPCORE_CUDA_CHECK(cudaMalloc(&stats_dev, sizeof(Stats))); + Stats *stats = nullptr; + COPCORE_CUDA_CHECK(cudaMallocHost(&stats, sizeof(Stats))); + + // Initialize primary particles. + constexpr int InitThreads = 32; + int initBlocks = (numParticles + InitThreads - 1) / InitThreads; + ParticleGenerator electronGenerator(electrons.tracks, electrons.slotManager, electrons.queues.currentlyActive); + InitPrimaries<<>>(electronGenerator, numParticles, energy, world_dev); + COPCORE_CUDA_CHECK(cudaDeviceSynchronize()); + + stats->inFlight[ParticleType::Electron] = numParticles; + stats->inFlight[ParticleType::Positron] = 0; + stats->inFlight[ParticleType::Gamma] = 0; + + std::cout << "INFO: running with field Bz = " << BzFieldValue / copcore::units::tesla << " T"; + std::cout << std::endl; + + constexpr int MaxBlocks = 1024; + constexpr int TransportThreads = 32; + int transportBlocks; + + vecgeom::Stopwatch timer; + timer.Start(); + + int inFlight; + int iterNo = 0, loopingNo = 0; + int previousElectrons = -1, previousPositrons = -1; + + do { + Secondaries secondaries = { + .electrons = {electrons.tracks, electrons.slotManager, electrons.queues.nextActive}, + .positrons = {positrons.tracks, positrons.slotManager, positrons.queues.nextActive}, + .gammas = {gammas.tracks, gammas.slotManager, gammas.queues.nextActive}, + }; + + // *** ELECTRONS *** + int numElectrons = stats->inFlight[ParticleType::Electron]; + if (numElectrons > 0) { + transportBlocks = (numElectrons + TransportThreads - 1) / TransportThreads; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportElectrons<<>>( + electrons.tracks, electrons.queues.currentlyActive, secondaries, electrons.queues.nextActive, scoring); + + COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, electrons.event, 0)); + } + + // *** POSITRONS *** + int numPositrons = stats->inFlight[ParticleType::Positron]; + if (numPositrons > 0) { + transportBlocks = (numPositrons + TransportThreads - 1) / TransportThreads; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportPositrons<<>>( + positrons.tracks, positrons.queues.currentlyActive, secondaries, positrons.queues.nextActive, scoring); + + COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, positrons.event, 0)); + } + + // *** GAMMAS *** + int numGammas = stats->inFlight[ParticleType::Gamma]; + if (numGammas > 0) { + transportBlocks = (numGammas + TransportThreads - 1) / TransportThreads; + transportBlocks = std::min(transportBlocks, MaxBlocks); + + TransportGammas<<>>( + gammas.tracks, gammas.queues.currentlyActive, secondaries, gammas.queues.nextActive, scoring); + + COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream)); + COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, gammas.event, 0)); + } + + // *** END OF TRANSPORT *** + + // The events ensure synchronization before finishing this iteration and + // copying the Stats back to the host. + AllParticleQueues queues = {{electrons.queues, positrons.queues, gammas.queues}}; + FinishIteration<<<1, 1, 0, stream>>>(queues, scoring, stats_dev); + COPCORE_CUDA_CHECK(cudaMemcpyAsync(stats, stats_dev, sizeof(Stats), cudaMemcpyDeviceToHost, stream)); + + // Finally synchronize all kernels. + COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream)); + + // Count the number of particles in flight. + inFlight = 0; + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + inFlight += stats->inFlight[i]; + } + + // Swap the queues for the next iteration. + electrons.queues.SwapActive(); + positrons.queues.SwapActive(); + gammas.queues.SwapActive(); + + std::cout << std::fixed << std::setprecision(4) << std::setfill(' '); + std::cout << "iter " << std::setw(4) << iterNo << " -- tracks in flight: " << std::setw(5) << inFlight + << " energy deposition: " << std::setw(10) << stats->scoring.energyDeposit / copcore::units::GeV + << " number of secondaries: " << std::setw(5) << stats->scoring.secondaries + << " number of hits: " << std::setw(4) << stats->scoring.hits; + std::cout << std::endl; + + // Check if only charged particles are left that are looping. + numElectrons = stats->inFlight[ParticleType::Electron]; + numPositrons = stats->inFlight[ParticleType::Positron]; + numGammas = stats->inFlight[ParticleType::Gamma]; + if (numElectrons == previousElectrons && numPositrons == previousPositrons && numGammas == 0) { + loopingNo++; + } else { + previousElectrons = numElectrons; + previousPositrons = numPositrons; + loopingNo = 0; + } + + iterNo++; + } while (inFlight > 0 && loopingNo < 20); + + auto time_cpu = timer.Stop(); + std::cout << "Run time: " << time_cpu << "\n"; + + // Free resources. + COPCORE_CUDA_CHECK(cudaFree(scoring)); + COPCORE_CUDA_CHECK(cudaFree(stats_dev)); + COPCORE_CUDA_CHECK(cudaFreeHost(stats)); + + COPCORE_CUDA_CHECK(cudaStreamDestroy(stream)); + + for (int i = 0; i < ParticleType::NumParticleTypes; i++) { + COPCORE_CUDA_CHECK(cudaFree(particles[i].tracks)); + COPCORE_CUDA_CHECK(cudaFree(particles[i].slotManager)); + + COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.currentlyActive)); + COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.nextActive)); + + COPCORE_CUDA_CHECK(cudaStreamDestroy(particles[i].stream)); + COPCORE_CUDA_CHECK(cudaEventDestroy(particles[i].event)); + } + + FreeG4HepEm(state); +} diff --git a/examples/Example12/example12.h b/examples/Example12/example12.h new file mode 100644 index 00000000..a6e7d8fd --- /dev/null +++ b/examples/Example12/example12.h @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#ifndef EXAMPLE9_H +#define EXAMPLE9_H + +#include +#ifdef VECGEOM_ENABLE_CUDA +#include // forward declares vecgeom::cxx::VPlacedVolume +#endif + +// Interface between C++ and CUDA. +void InitBVH(); +void example11(const vecgeom::cxx::VPlacedVolume *world, int numParticles, double energy); + +#endif diff --git a/examples/Example12/gammas.cu b/examples/Example12/gammas.cu new file mode 100644 index 00000000..05fc8027 --- /dev/null +++ b/examples/Example12/gammas.cu @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: 2021 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include "example11.cuh" + +#include +#include + +#include +#include +#include +#include +// Pull in implementation. +#include +#include +#include + +__global__ void TransportGammas(Track *gammas, const adept::MParray *active, Secondaries secondaries, + adept::MParray *activeQueue, GlobalScoring *scoring) +{ + int activeSize = active->size(); + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) { + const int slot = (*active)[i]; + Track ¤tTrack = gammas[slot]; + + // Init a track with the needed data to call into G4HepEm. + G4HepEmTrack emTrack; + emTrack.SetEKin(currentTrack.energy); + // For now, just assume a single material. + int theMCIndex = 1; + emTrack.SetMCIndex(theMCIndex); + + // Sample the `number-of-interaction-left` and put it into the track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = currentTrack.numIALeft[ip]; + if (numIALeft <= 0) { + numIALeft = -std::log(currentTrack.Uniform()); + currentTrack.numIALeft[ip] = numIALeft; + } + emTrack.SetNumIALeft(numIALeft, ip); + } + + // Call G4HepEm to compute the physics step limit. + G4HepEmGammaManager::HowFar(&g4HepEmData, &g4HepEmPars, &emTrack); + + // Get result into variables. + double geometricalStepLengthFromPhysics = emTrack.GetGStepLength(); + int winnerProcessIndex = emTrack.GetWinnerProcessIndex(); + // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we + // also need to carry them over! + + // Check if there's a volume boundary in between. + vecgeom::NavStateIndex nextState; + double geometryStepLength = BVHNavigator::ComputeStepAndNextVolume( + currentTrack.pos, currentTrack.dir, geometricalStepLengthFromPhysics, currentTrack.navState, nextState); + currentTrack.pos += geometryStepLength * currentTrack.dir; + + if (nextState.IsOnBoundary()) { + emTrack.SetGStepLength(geometryStepLength); + emTrack.SetOnBoundary(true); + } + + G4HepEmGammaManager::UpdateNumIALeft(&emTrack); + + // Save the `number-of-interaction-left` in our track. + for (int ip = 0; ip < 3; ++ip) { + double numIALeft = emTrack.GetNumIALeft(ip); + currentTrack.numIALeft[ip] = numIALeft; + } + + if (nextState.IsOnBoundary()) { + // For now, just count that we hit something. + atomicAdd(&scoring->hits, 1); + + // Kill the particle if it left the world. + if (nextState.Top() != nullptr) { + activeQueue->push_back(slot); + + // Move to the next boundary. + BVHNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, nextState); + currentTrack.navState = nextState; + } + continue; + } else if (winnerProcessIndex < 0) { + // No discrete process, move on. + activeQueue->push_back(slot); + continue; + } + + // Reset number of interaction left for the winner discrete process. + // (Will be resampled in the next iteration.) + currentTrack.numIALeft[winnerProcessIndex] = -1.0; + + // Perform the discrete interaction. + RanluxppDoubleEngine rnge(¤tTrack.rngState); + // We might need one branched RNG state, prepare while threads are synchronized. + RanluxppDouble newRNG(currentTrack.rngState.Branch()); + + const double energy = currentTrack.energy; + + switch (winnerProcessIndex) { + case 0: { + // Invoke gamma conversion to e-/e+ pairs, if the energy is above the threshold. + if (energy < 2 * copcore::units::kElectronMassC2) { + activeQueue->push_back(slot); + continue; + } + + double logEnergy = std::log(energy); + double elKinEnergy, posKinEnergy; + G4HepEmGammaInteractionConversion::SampleKinEnergies(&g4HepEmData, energy, logEnergy, theMCIndex, elKinEnergy, + posKinEnergy, &rnge); + + double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirSecondaryEl[3], dirSecondaryPos[3]; + G4HepEmGammaInteractionConversion::SampleDirections(dirPrimary, dirSecondaryEl, dirSecondaryPos, elKinEnergy, + posKinEnergy, &rnge); + + Track &electron = secondaries.electrons.NextTrack(); + Track &positron = secondaries.positrons.NextTrack(); + atomicAdd(&scoring->secondaries, 2); + + electron.InitAsSecondary(/*parent=*/currentTrack); + electron.rngState = newRNG; + electron.energy = elKinEnergy; + electron.dir.Set(dirSecondaryEl[0], dirSecondaryEl[1], dirSecondaryEl[2]); + + positron.InitAsSecondary(/*parent=*/currentTrack); + // Reuse the RNG state of the dying track. + positron.rngState = currentTrack.rngState; + positron.energy = posKinEnergy; + positron.dir.Set(dirSecondaryPos[0], dirSecondaryPos[1], dirSecondaryPos[2]); + + // The current track is killed by not enqueuing into the next activeQueue. + break; + } + case 1: { + // Invoke Compton scattering of gamma. + constexpr double LowEnergyThreshold = 100 * copcore::units::eV; + if (energy < LowEnergyThreshold) { + activeQueue->push_back(slot); + continue; + } + const double origDirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()}; + double dirPrimary[3]; + const double newEnergyGamma = + G4HepEmGammaInteractionCompton::SamplePhotonEnergyAndDirection(energy, dirPrimary, origDirPrimary, &rnge); + vecgeom::Vector3D newDirGamma(dirPrimary[0], dirPrimary[1], dirPrimary[2]); + + const double energyEl = energy - newEnergyGamma; + if (energyEl > LowEnergyThreshold) { + // Create a secondary electron and sample/compute directions. + Track &electron = secondaries.electrons.NextTrack(); + atomicAdd(&scoring->secondaries, 1); + + electron.InitAsSecondary(/*parent=*/currentTrack); + electron.rngState = newRNG; + electron.energy = energyEl; + electron.dir = energy * currentTrack.dir - newEnergyGamma * newDirGamma; + electron.dir.Normalize(); + } else { + atomicAdd(&scoring->energyDeposit, energyEl); + } + + // Check the new gamma energy and deposit if below threshold. + if (newEnergyGamma > LowEnergyThreshold) { + currentTrack.energy = newEnergyGamma; + currentTrack.dir = newDirGamma; + + // The current track continues to live. + activeQueue->push_back(slot); + } else { + atomicAdd(&scoring->energyDeposit, newEnergyGamma); + // The current track is killed by not enqueuing into the next activeQueue. + } + break; + } + case 2: { + // Invoke photoelectric process: right now only absorb the gamma. + atomicAdd(&scoring->energyDeposit, energy); + // The current track is killed by not enqueuing into the next activeQueue. + break; + } + } + } +}