diff --git a/examples/Example12/CMakeLists.txt b/examples/Example12/CMakeLists.txt
new file mode 100644
index 00000000..1c4206bd
--- /dev/null
+++ b/examples/Example12/CMakeLists.txt
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: 2021 CERN
+# SPDX-License-Identifier: Apache-2.0
+
+option(BUILTIN_G4VECGEOMNAV "Build G4VecGeomNav from source (requires VecGeom)" ON)
+
+if(NOT TARGET G4HepEm::g4HepEm)
+  message(STATUS "Disabling example12 (needs G4HepEm)")
+  return()
+endif()
+
+# Start  Configuration of G4VecGeomNav - converter from G4 to VecGeom volumes
+# -----  -------------    ------------
+# Minimum version of G4VecGeomNav we need.
+set(G4VecGeomNav_VERSION "") # 2021.06.18") # 0.1")
+
+if(BUILTIN_G4VECGEOMNAV)
+  include(FetchContent)
+
+  FetchContent_Declare(
+    g4vecgeomnav
+    GIT_REPOSITORY https://gitlab.cern.ch/VecGeom/g4vecgeomnav.git
+    GIT_TAG 1a002fbf2bccd81827b6100f86e2ad82433a75e9
+  )
+
+  FetchContent_GetProperties(g4vecgeomnav)
+
+  if(NOT g4vecgeomnav_POPULATED)
+    FetchContent_Populate(g4vecgeomnav)
+    message(STATUS "${g4vecgeomnav_SOURCE_DIR}, ${g4vecgeomnav_BINARY_DIR}")
+    add_subdirectory(${g4vecgeomnav_SOURCE_DIR} ${g4vecgeomnav_BINARY_DIR})
+  endif()
+else()
+  find_package(G4VecGeomNav REQUIRED)
+endif()
+# End   configuration of G4VecGeomNav 
+# ---   -------------    ------------
+
+add_executable(example12 example12.cpp example12.cu electrons.cu gammas.cu)
+target_link_libraries(example12
+   PRIVATE
+     AdePT CopCore::CopCore
+     G4VecGeomNav::g4vecgeomnav
+     VecGeom::vecgeom VecGeom::vecgeomcuda_static VecGeom::vgdml
+     ${Geant4_LIBRARIES}
+     G4HepEm::g4HepEmData G4HepEm::g4HepEmInit G4HepEm::g4HepEmRun)
+set_target_properties(example12 PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+
+add_test(NAME example12 COMMAND example12 -gdml_name "${TESTING_GDML}")
diff --git a/examples/Example12/README.md b/examples/Example12/README.md
new file mode 100644
index 00000000..91c3e8ff
--- /dev/null
+++ b/examples/Example12/README.md
@@ -0,0 +1,50 @@
+<!--
+SPDX-FileCopyrightText: 2021 CERN
+SPDX-License-Identifier: CC-BY-4.0
+-->
+
+## Example 11
+
+Demonstrator of particle transportation on GPUs, with:
+
+ * geometry via VecGeom and a magnetic field with constant Bz,
+ * physics processes for e-/e+ and gammas using G4HepEm.
+ * accelerated navigation using newly added BVH class in VecGeom
+
+Electrons, positrons, and gammas are stored in separate containers in device memory.
+Free positions in the storage are handed out with monotonic slot numbers, slots are not reused.
+Active tracks are passed via three queues per particle type (see `struct ParticleQueues`).
+Results are reproducible using one RANLUX++ state per track.
+
+### Kernels
+
+This example uses one stream per particle type to launch kernels asynchronously.
+They are synchronized via a forth stream using CUDA events.
+
+#### `TransportElectrons<bool IsElectron>`
+
+1. Determine physics step limit.
+2. Call magnetic field to get geometry step length.
+3. Apply continuous effects; kill track if stopped.
+4. If the particle reaches a boundary, perform relocation.
+5. If not, and if there is a discrete process:
+ 1. Sample the final state.
+ 2. Update the primary and produce secondaries.
+
+#### `TransportGammas`
+
+1. Determine the physics step limit.
+2. Query VecGeom to get geometry step length (no magnetic field for neutral particles!).
+3. If the particle reaches a boundary, perform relocation.
+4. If not, and if there is a discrete process:
+ 1. Sample the final state.
+ 2. Update the primary and produce secondaries.
+
+#### `FinishIteration`
+
+Clear the queues and return the tracks in flight.
+This kernel runs after all secondary particles were produced.
+
+#### `InitPrimaries` and `InitParticleQueues`
+
+Used to initialize multiple primary particles with separate seeds.
diff --git a/examples/Example12/electrons.cu b/examples/Example12/electrons.cu
new file mode 100644
index 00000000..818d9442
--- /dev/null
+++ b/examples/Example12/electrons.cu
@@ -0,0 +1,263 @@
+// SPDX-FileCopyrightText: 2021 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example11.cuh"
+
+#include <fieldPropagatorConstBz.h>
+
+#include <CopCore/PhysicalConstants.h>
+#include <AdePT/BVHNavigator.h>
+
+#include <G4HepEmElectronManager.hh>
+#include <G4HepEmElectronTrack.hh>
+#include <G4HepEmElectronInteractionBrem.hh>
+#include <G4HepEmElectronInteractionIoni.hh>
+#include <G4HepEmPositronInteractionAnnihilation.hh>
+// Pull in implementation.
+#include <G4HepEmRunUtils.icc>
+#include <G4HepEmInteractionUtils.icc>
+#include <G4HepEmElectronManager.icc>
+#include <G4HepEmElectronInteractionBrem.icc>
+#include <G4HepEmElectronInteractionIoni.icc>
+#include <G4HepEmPositronInteractionAnnihilation.icc>
+
+// Compute the physics and geometry step limit, transport the electrons while
+// applying the continuous effects and maybe a discrete process that could
+// generate secondaries.
+template <bool IsElectron>
+static __device__ __forceinline__ void TransportElectrons(Track *electrons, const adept::MParray *active,
+                                                          Secondaries &secondaries, adept::MParray *activeQueue,
+                                                          GlobalScoring *scoring)
+{
+  constexpr int Charge  = IsElectron ? -1 : 1;
+  constexpr double Mass = copcore::units::kElectronMassC2;
+  fieldPropagatorConstBz fieldPropagatorBz(BzFieldValue);
+
+  int activeSize = active->size();
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) {
+    const int slot      = (*active)[i];
+    Track &currentTrack = electrons[slot];
+
+    // Init a track with the needed data to call into G4HepEm.
+    G4HepEmElectronTrack elTrack;
+    G4HepEmTrack *theTrack = elTrack.GetTrack();
+    theTrack->SetEKin(currentTrack.energy);
+    
+    // Fetch the material cuts-couple index from the G4 track
+    auto cur_state= track->currentState();
+    // auto cur_nav_index= cur_state.GetNavIndex(cur_state.GetLevel());
+    VPlacedVolume const * placedVol = cur_state.Top();
+    LogicalVolume const * logicalVol = placedVol->GetLogicalVolume();
+    int theMCIndex = (int) logicalVol->GetMaterialCutsPtr();  // It's not the pointer, it's an index !!
+    std::cout << " Volume (log): " << logicalVol->GetName() << " mat-cuts-cpl = " << theMCindex << "\n";
+       
+    theTrack->SetMCIndex(theMCIndex);
+
+    theTrack->SetCharge(Charge);
+
+    // Sample the `number-of-interaction-left` and put it into the track.
+    for (int ip = 0; ip < 3; ++ip) {
+      double numIALeft = currentTrack.numIALeft[ip];
+      if (numIALeft <= 0) {
+        numIALeft                  = -std::log(currentTrack.Uniform());
+        currentTrack.numIALeft[ip] = numIALeft;
+      }
+      theTrack->SetNumIALeft(numIALeft, ip);
+    }
+
+    // Call G4HepEm to compute the physics step limit.
+    G4HepEmElectronManager::HowFar(&g4HepEmData, &g4HepEmPars, &elTrack);
+
+    // Get result into variables.
+    double geometricalStepLengthFromPhysics = theTrack->GetGStepLength();
+    // The phyiscal step length is the amount that the particle experiences
+    // which might be longer than the geometrical step length due to MSC. As
+    // long as we call PerformContinuous in the same kernel we don't need to
+    // care, but we need to make this available when splitting the operations.
+    // double physicalStepLength = elTrack.GetPStepLength();
+    int winnerProcessIndex = theTrack->GetWinnerProcessIndex();
+    // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we
+    // also need to carry them over!
+
+    // Check if there's a volume boundary in between.
+    vecgeom::NavStateIndex nextState;
+    double geometryStepLength = fieldPropagatorBz.ComputeStepAndPropagatedState</*Relocate=*/false, BVHNavigator>(
+        currentTrack.energy, Mass, Charge, geometricalStepLengthFromPhysics, currentTrack.pos, currentTrack.dir,
+        currentTrack.navState, nextState);
+
+    if (nextState.IsOnBoundary()) {
+      theTrack->SetGStepLength(geometryStepLength);
+      theTrack->SetOnBoundary(true);
+    }
+
+    // Apply continuous effects.
+    bool stopped = G4HepEmElectronManager::PerformContinuous(&g4HepEmData, &g4HepEmPars, &elTrack);
+    // Collect the changes.
+    currentTrack.energy = theTrack->GetEKin();
+    atomicAdd(&scoring->energyDeposit, theTrack->GetEnergyDeposit());
+
+    // Save the `number-of-interaction-left` in our track.
+    for (int ip = 0; ip < 3; ++ip) {
+      double numIALeft           = theTrack->GetNumIALeft(ip);
+      currentTrack.numIALeft[ip] = numIALeft;
+    }
+
+    if (stopped) {
+      if (!IsElectron) {
+        // Annihilate the stopped positron into two gammas heading to opposite
+        // directions (isotropic).
+        Track &gamma1 = secondaries.gammas.NextTrack();
+        Track &gamma2 = secondaries.gammas.NextTrack();
+        atomicAdd(&scoring->secondaries, 2);
+
+        const double cost = 2 * currentTrack.Uniform() - 1;
+        const double sint = sqrt(1 - cost * cost);
+        const double phi  = k2Pi * currentTrack.Uniform();
+        double sinPhi, cosPhi;
+        sincos(phi, &sinPhi, &cosPhi);
+
+        gamma1.InitAsSecondary(/*parent=*/currentTrack);
+        gamma1.rngState = currentTrack.rngState.Branch();
+        gamma1.energy = copcore::units::kElectronMassC2;
+        gamma1.dir.Set(sint * cosPhi, sint * sinPhi, cost);
+
+        gamma2.InitAsSecondary(/*parent=*/currentTrack);
+        // Reuse the RNG state of the dying track.
+        gamma2.rngState = currentTrack.rngState;
+        gamma2.energy = copcore::units::kElectronMassC2;
+        gamma2.dir    = -gamma1.dir;
+      }
+      // Particles are killed by not enqueuing them into the new activeQueue.
+      continue;
+    }
+
+    if (nextState.IsOnBoundary()) {
+      // For now, just count that we hit something.
+      atomicAdd(&scoring->hits, 1);
+
+      // Kill the particle if it left the world.
+      if (nextState.Top() != nullptr) {
+        activeQueue->push_back(slot);
+
+        // Move to the next boundary.
+        BVHNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, nextState);
+        currentTrack.navState = nextState;
+      }
+      continue;
+    } else if (winnerProcessIndex < 0) {
+      // No discrete process, move on.
+      activeQueue->push_back(slot);
+      continue;
+    }
+
+    // Reset number of interaction left for the winner discrete process.
+    // (Will be resampled in the next iteration.)
+    currentTrack.numIALeft[winnerProcessIndex] = -1.0;
+
+    // Check if a delta interaction happens instead of the real discrete process.
+    if (G4HepEmElectronManager::CheckDelta(&g4HepEmData, theTrack, currentTrack.Uniform())) {
+      // A delta interaction happened, move on.
+      activeQueue->push_back(slot);
+      continue;
+    }
+
+    // Perform the discrete interaction.
+    RanluxppDoubleEngine rnge(&currentTrack.rngState);
+    // We will need one branched RNG state, prepare while threads are synchronized.
+    RanluxppDouble newRNG(currentTrack.rngState.Branch());
+
+    const double energy   = currentTrack.energy;
+    const double theElCut = g4HepEmData.fTheMatCutData->fMatCutData[theMCIndex].fSecElProdCutE;
+
+    switch (winnerProcessIndex) {
+    case 0: {
+      // Invoke ionization (for e-/e+):
+      double deltaEkin = (IsElectron) ? G4HepEmElectronInteractionIoni::SampleETransferMoller(theElCut, energy, &rnge)
+                                      : G4HepEmElectronInteractionIoni::SampleETransferBhabha(theElCut, energy, &rnge);
+
+      double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()};
+      double dirSecondary[3];
+      G4HepEmElectronInteractionIoni::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge);
+
+      Track &secondary = secondaries.electrons.NextTrack();
+      atomicAdd(&scoring->secondaries, 1);
+
+      secondary.InitAsSecondary(/*parent=*/currentTrack);
+      secondary.rngState = newRNG;
+      secondary.energy = deltaEkin;
+      secondary.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]);
+
+      currentTrack.energy = energy - deltaEkin;
+      currentTrack.dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]);
+      // The current track continues to live.
+      activeQueue->push_back(slot);
+      break;
+    }
+    case 1: {
+      // Invoke model for Bremsstrahlung: either SB- or Rel-Brem.
+      double logEnergy = std::log(energy);
+      double deltaEkin = energy < g4HepEmPars.fElectronBremModelLim
+                             ? G4HepEmElectronInteractionBrem::SampleETransferSB(&g4HepEmData, energy, logEnergy,
+                                                                                 theMCIndex, &rnge, IsElectron)
+                             : G4HepEmElectronInteractionBrem::SampleETransferRB(&g4HepEmData, energy, logEnergy,
+                                                                                 theMCIndex, &rnge, IsElectron);
+
+      double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()};
+      double dirSecondary[3];
+      G4HepEmElectronInteractionBrem::SampleDirections(energy, deltaEkin, dirSecondary, dirPrimary, &rnge);
+
+      Track &gamma = secondaries.gammas.NextTrack();
+      atomicAdd(&scoring->secondaries, 1);
+
+      gamma.InitAsSecondary(/*parent=*/currentTrack);
+      gamma.rngState = newRNG;
+      gamma.energy = deltaEkin;
+      gamma.dir.Set(dirSecondary[0], dirSecondary[1], dirSecondary[2]);
+
+      currentTrack.energy = energy - deltaEkin;
+      currentTrack.dir.Set(dirPrimary[0], dirPrimary[1], dirPrimary[2]);
+      // The current track continues to live.
+      activeQueue->push_back(slot);
+      break;
+    }
+    case 2: {
+      // Invoke annihilation (in-flight) for e+
+      double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()};
+      double theGamma1Ekin, theGamma2Ekin;
+      double theGamma1Dir[3], theGamma2Dir[3];
+      G4HepEmPositronInteractionAnnihilation::SampleEnergyAndDirectionsInFlight(
+          energy, dirPrimary, &theGamma1Ekin, theGamma1Dir, &theGamma2Ekin, theGamma2Dir, &rnge);
+
+      Track &gamma1 = secondaries.gammas.NextTrack();
+      Track &gamma2 = secondaries.gammas.NextTrack();
+      atomicAdd(&scoring->secondaries, 2);
+
+      gamma1.InitAsSecondary(/*parent=*/currentTrack);
+      gamma1.rngState = newRNG;
+      gamma1.energy = theGamma1Ekin;
+      gamma1.dir.Set(theGamma1Dir[0], theGamma1Dir[1], theGamma1Dir[2]);
+
+      gamma2.InitAsSecondary(/*parent=*/currentTrack);
+      // Reuse the RNG state of the dying track.
+      gamma2.rngState = currentTrack.rngState;
+      gamma2.energy = theGamma2Ekin;
+      gamma2.dir.Set(theGamma2Dir[0], theGamma2Dir[1], theGamma2Dir[2]);
+
+      // The current track is killed by not enqueuing into the next activeQueue.
+      break;
+    }
+    }
+  }
+}
+
+// Instantiate kernels for electrons and positrons.
+__global__ void TransportElectrons(Track *electrons, const adept::MParray *active, Secondaries secondaries,
+                                   adept::MParray *activeQueue, GlobalScoring *scoring)
+{
+  TransportElectrons</*IsElectron*/ true>(electrons, active, secondaries, activeQueue, scoring);
+}
+__global__ void TransportPositrons(Track *positrons, const adept::MParray *active, Secondaries secondaries,
+                                   adept::MParray *activeQueue, GlobalScoring *scoring)
+{
+  TransportElectrons</*IsElectron*/ false>(positrons, active, secondaries, activeQueue, scoring);
+}
diff --git a/examples/Example12/example12.cpp b/examples/Example12/example12.cpp
new file mode 100644
index 00000000..6f524617
--- /dev/null
+++ b/examples/Example12/example12.cpp
@@ -0,0 +1,151 @@
+/x/ SPDX-FileCopyrightText: 2021 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example11.h"
+
+#include <AdePT/ArgParser.h>
+#include <CopCore/SystemOfUnits.h>
+
+#include <G4NistManager.hh>
+#include <G4Material.hh>
+
+#include <G4Box.hh>
+#include <G4LogicalVolume.hh>
+#include <G4PVPlacement.hh>
+
+#include <G4ParticleTable.hh>
+#include <G4Electron.hh>
+#include <G4Positron.hh>
+#include <G4Gamma.hh>
+#include <G4Proton.hh>
+
+#include <G4ProductionCuts.hh>
+#include <G4Region.hh>
+#include <G4ProductionCutsTable.hh>
+
+#include <G4SystemOfUnits.hh>
+
+#include <VecGeom/base/Config.h>
+#include <VecGeom/management/BVHManager.h>
+#include <VecGeom/management/GeoManager.h>
+
+#include <G4String.hh>
+#include <G4GDMLParser.hh>
+
+// Necessary state ... 
+// static G4GDMLParser        gParser;
+static G4VPhysicalVolume*  gWorldPhysical = nullptr;
+
+// Configuration 'parameters'
+static G4String fGDMLFileName=     "Default.gdml"; // Change it ... 
+bool            cautiousNavigator= true; //  Use it (true) for first runs and tests.  False otherwise.
+   
+static
+G4VPhysicalVolume* ConstructG4geometry() // Replacing G4VUserDetectorConstruction::Construct()
+{
+  std:cout << "ConstructG4 geometry: reading GDML file : " << fGDMLFileName << "\n";
+  G4GDMLParser gdmlParser;   
+  gdmlParser.Read(fGDMLFileName, false);
+  auto worldPhysical = const_cast<G4VPhysicalVolume *>(gParser.GetWorldVolume());
+
+  if (worldPhysical==nullptr) {
+    G4ExceptionDescription ed;
+    ed << "World volume not set properly check your setup selection criteria" 
+       << "or GDML input!" << G4endl;
+    G4Exception( "ConstructG4Geometry() failed", 
+                "GeometryCreationError_01", FatalException, ed );
+                
+  }
+  return worldPhysical;
+}
+
+static void initGeant4()
+{
+  //  parser.SetOverlapCheck(true);
+  G4TransportationManager *trMgr = 
+     G4TransportationManager::GetTransportationManager();
+  assert(trMgr);
+
+  gWorldPhysical= ConstructG4geometry();
+  
+  G4Navigator *nav = trMgr->GetNavigatorForTracking();
+  nav->SetWorldVolume(gWorldPhysical);
+
+  // Configure G4 Navigator - to best report errors (optional)
+  assert(nav);
+  if( cautiousNavigator ) { 
+     nav->CheckMode(true);
+     std::cout << "Enabled Check mode in G4Navigator";
+     nav->SetVerboseLevel(1);
+     std::cout << "Enabled Verbose 1  in G4Navigator";
+  }
+  
+  // write back
+  // gParser.Write("out.gdml", gWorldPhysical);
+
+  // Manage the magnetic field (future extension)
+  // auto fFieldMgr = trMgr->GetFieldManager();
+
+  // Fix the visibility of the world (for Geant4 11.0-beta )
+  gWorldPhysical->GetLogicalVolume()->SetVisAttributes(G4VisAttributes::GetInvisible());
+
+  //
+  // --- Create particles that have secondary production threshold.
+  G4Gamma::Gamma();
+  G4Electron::Electron();
+  G4Positron::Positron();
+  G4Proton::Proton();
+  G4ParticleTable *partTable = G4ParticleTable::GetParticleTable();
+  partTable->SetReadiness();
+
+  //
+  // --- Create production - cuts object and set the secondary production threshold.
+  G4ProductionCuts *productionCuts = new G4ProductionCuts();
+  constexpr G4double ProductionCut = 1 * mm;
+  productionCuts->SetProductionCut(ProductionCut);
+  //
+  // --- Register a region for the world.
+  G4Region *reg = new G4Region("default");
+  reg->AddRootLogicalVolume(worldLog);
+  reg->UsedInMassGeometry(true);
+  reg->SetProductionCuts(productionCuts);
+  //
+  // --- Update the couple tables.
+  G4ProductionCutsTable *theCoupleTable = G4ProductionCutsTable::GetProductionCutsTable();
+  theCoupleTable->UpdateCoupleTable(world);
+}
+
+void InitBVH()
+{
+  vecgeom::cxx::BVHManager::Init();
+  vecgeom::cxx::BVHManager::DeviceInit();
+}
+
+int main(int argc, char *argv[])
+{
+#ifndef VECGEOM_USE_NAVINDEX
+  std::cout << "### VecGeom must be compiled with USE_NAVINDEX support to run this.\n";
+  return 2;
+#endif
+
+  OPTION_STRING(gdml_name, "trackML.gdml");
+  OPTION_INT(cache_depth, 0); // 0 = full depth
+  OPTION_INT(particles, 1);
+  OPTION_DOUBLE(energy, 100); // entered in GeV
+  energy *= copcore::units::GeV;
+
+  InitGeant4();
+
+  // This will convert the geometry to VecGeom and implicitely also set up
+  //   the VecGeom navigator.
+  //  Q: does it change the G4 Navigator to use VecGeom Navigation ?????  JA 07.09.2021
+  G4VecGeomConverter::Instance().SetVerbose(1);
+  G4VecGeomConverter::Instance().ConvertG4Geometry(gWorldPhysical);
+  G4cout << vecgeom::GeoManager::Instance().getMaxDepth() << "\n";
+#endif
+
+  const vecgeom::VPlacedVolume *world = vecgeom::GeoManager::Instance().GetWorld();
+  if (!world) return 4;
+
+  example11(world, particles, energy);
+}
diff --git a/examples/Example12/example12.cu b/examples/Example12/example12.cu
new file mode 100644
index 00000000..7d12f4bd
--- /dev/null
+++ b/examples/Example12/example12.cu
@@ -0,0 +1,360 @@
+// SPDX-FileCopyrightText: 2021 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example11.h"
+#include "example11.cuh"
+
+#include <AdePT/Atomic.h>
+#include <AdePT/BVHNavigator.h>
+#include <AdePT/MParray.h>
+
+#include <CopCore/Global.h>
+#include <CopCore/PhysicalConstants.h>
+#include <CopCore/Ranluxpp.h>
+
+#include <VecGeom/base/Config.h>
+#include <VecGeom/base/Stopwatch.h>
+#include <VecGeom/management/BVHManager.h>
+
+#include <G4HepEmData.hh>
+#include <G4HepEmElectronInit.hh>
+#include <G4HepEmGammaInit.hh>
+#include <G4HepEmMatCutData.hh>
+#include <G4HepEmMaterialInit.hh>
+#include <G4HepEmParameters.hh>
+#include <G4HepEmParametersInit.hh>
+
+#include <iostream>
+#include <iomanip>
+#include <stdio.h>
+
+__constant__ __device__ struct G4HepEmParameters g4HepEmPars;
+__constant__ __device__ struct G4HepEmData g4HepEmData;
+
+__constant__ __device__ int Zero = 0;
+
+struct G4HepEmState {
+  G4HepEmData data;
+  G4HepEmParameters parameters;
+};
+
+static G4HepEmState *InitG4HepEm()
+{
+  G4HepEmState *state = new G4HepEmState;
+  InitG4HepEmData(&state->data);
+  InitHepEmParameters(&state->parameters);
+
+  InitMaterialAndCoupleData(&state->data, &state->parameters);
+
+  InitElectronData(&state->data, &state->parameters, true);
+  InitElectronData(&state->data, &state->parameters, false);
+  InitGammaData(&state->data, &state->parameters);
+
+  G4HepEmMatCutData *cutData = state->data.fTheMatCutData;
+  std::cout << "fNumG4MatCuts = " << cutData->fNumG4MatCuts << ", fNumMatCutData = " << cutData->fNumMatCutData
+            << std::endl;
+
+  // Copy to GPU.
+  CopyG4HepEmDataToGPU(&state->data);
+  COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmPars, &state->parameters, sizeof(G4HepEmParameters)));
+
+  // Create G4HepEmData with the device pointers.
+  G4HepEmData dataOnDevice;
+  dataOnDevice.fTheMatCutData   = state->data.fTheMatCutData_gpu;
+  dataOnDevice.fTheMaterialData = state->data.fTheMaterialData_gpu;
+  dataOnDevice.fTheElementData  = state->data.fTheElementData_gpu;
+  dataOnDevice.fTheElectronData = state->data.fTheElectronData_gpu;
+  dataOnDevice.fThePositronData = state->data.fThePositronData_gpu;
+  dataOnDevice.fTheSBTableData  = state->data.fTheSBTableData_gpu;
+  dataOnDevice.fTheGammaData    = state->data.fTheGammaData_gpu;
+  // The other pointers should never be used.
+  dataOnDevice.fTheMatCutData_gpu   = nullptr;
+  dataOnDevice.fTheMaterialData_gpu = nullptr;
+  dataOnDevice.fTheElementData_gpu  = nullptr;
+  dataOnDevice.fTheElectronData_gpu = nullptr;
+  dataOnDevice.fThePositronData_gpu = nullptr;
+  dataOnDevice.fTheSBTableData_gpu  = nullptr;
+  dataOnDevice.fTheGammaData_gpu    = nullptr;
+
+  COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(g4HepEmData, &dataOnDevice, sizeof(G4HepEmData)));
+
+  return state;
+}
+
+static void FreeG4HepEm(G4HepEmState *state)
+{
+  FreeG4HepEmData(&state->data);
+  delete state;
+}
+
+// A bundle of queues per particle type:
+//  * Two for active particles, one for the current iteration and the second for the next.
+struct ParticleQueues {
+  adept::MParray *currentlyActive;
+  adept::MParray *nextActive;
+
+  void SwapActive() { std::swap(currentlyActive, nextActive); }
+};
+
+struct ParticleType {
+  Track *tracks;
+  SlotManager *slotManager;
+  ParticleQueues queues;
+  cudaStream_t stream;
+  cudaEvent_t event;
+
+  enum {
+    Electron = 0,
+    Positron = 1,
+    Gamma    = 2,
+
+    NumParticleTypes,
+  };
+};
+
+// A bundle of queues for the three particle types.
+struct AllParticleQueues {
+  ParticleQueues queues[ParticleType::NumParticleTypes];
+};
+
+// Kernel to initialize the set of queues per particle type.
+__global__ void InitParticleQueues(ParticleQueues queues, size_t Capacity)
+{
+  adept::MParray::MakeInstanceAt(Capacity, queues.currentlyActive);
+  adept::MParray::MakeInstanceAt(Capacity, queues.nextActive);
+}
+
+// Kernel function to initialize a set of primary particles.
+__global__ void InitPrimaries(ParticleGenerator generator, int particles, double energy,
+                              const vecgeom::VPlacedVolume *world)
+{
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particles; i += blockDim.x * gridDim.x) {
+    Track &track = generator.NextTrack();
+
+    track.rngState.SetSeed(314159265 * (i + 1));
+    track.energy       = energy;
+    track.numIALeft[0] = -1.0;
+    track.numIALeft[1] = -1.0;
+    track.numIALeft[2] = -1.0;
+
+    track.pos = {0, 0, 0};
+    track.dir = {1.0, 0, 0};
+    BVHNavigator::LocatePointIn(world, track.pos, track.navState, true);
+  }
+}
+
+// A data structure to transfer statistics after each iteration.
+struct Stats {
+  GlobalScoring scoring;
+  int inFlight[ParticleType::NumParticleTypes];
+};
+
+// Finish iteration: clear queues and fill statistics.
+__global__ void FinishIteration(AllParticleQueues all, const GlobalScoring *scoring, Stats *stats)
+{
+  stats->scoring = *scoring;
+  for (int i = 0; i < ParticleType::NumParticleTypes; i++) {
+    all.queues[i].currentlyActive->clear();
+    stats->inFlight[i] = all.queues[i].nextActive->size();
+  }
+}
+
+void example11(const vecgeom::cxx::VPlacedVolume *world, int numParticles, double energy)
+{
+  auto &cudaManager = vecgeom::cxx::CudaManager::Instance();
+  cudaManager.LoadGeometry(world);
+  cudaManager.Synchronize();
+
+  const vecgeom::cuda::VPlacedVolume *world_dev = cudaManager.world_gpu();
+
+  InitBVH();
+
+  G4HepEmState *state = InitG4HepEm();
+
+  // Capacity of the different containers aka the maximum number of particles.
+  constexpr int Capacity = 256 * 1024;
+
+  std::cout << "INFO: capacity of containers set to " << Capacity << std::endl;
+
+  // Allocate structures to manage tracks of an implicit type:
+  //  * memory to hold the actual Track elements,
+  //  * objects to manage slots inside the memory,
+  //  * queues of slots to remember active particle,
+  //  * a stream and an event for synchronization of kernels.
+  constexpr size_t TracksSize  = sizeof(Track) * Capacity;
+  constexpr size_t ManagerSize = sizeof(SlotManager);
+  const size_t QueueSize       = adept::MParray::SizeOfInstance(Capacity);
+
+  ParticleType particles[ParticleType::NumParticleTypes];
+  SlotManager slotManagerInit(Capacity);
+  for (int i = 0; i < ParticleType::NumParticleTypes; i++) {
+    COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].tracks, TracksSize));
+
+    COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].slotManager, ManagerSize));
+    COPCORE_CUDA_CHECK(cudaMemcpy(particles[i].slotManager, &slotManagerInit, ManagerSize, cudaMemcpyHostToDevice));
+
+    COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.currentlyActive, QueueSize));
+    COPCORE_CUDA_CHECK(cudaMalloc(&particles[i].queues.nextActive, QueueSize));
+    InitParticleQueues<<<1, 1>>>(particles[i].queues, Capacity);
+
+    COPCORE_CUDA_CHECK(cudaStreamCreate(&particles[i].stream));
+    COPCORE_CUDA_CHECK(cudaEventCreate(&particles[i].event));
+  }
+  COPCORE_CUDA_CHECK(cudaDeviceSynchronize());
+
+  ParticleType &electrons = particles[ParticleType::Electron];
+  ParticleType &positrons = particles[ParticleType::Positron];
+  ParticleType &gammas    = particles[ParticleType::Gamma];
+
+  // Create a stream to synchronize kernels of all particle types.
+  cudaStream_t stream;
+  COPCORE_CUDA_CHECK(cudaStreamCreate(&stream));
+
+  // Allocate and initialize scoring and statistics.
+  GlobalScoring *scoring = nullptr;
+  COPCORE_CUDA_CHECK(cudaMalloc(&scoring, sizeof(GlobalScoring)));
+  COPCORE_CUDA_CHECK(cudaMemset(scoring, 0, sizeof(GlobalScoring)));
+
+  Stats *stats_dev = nullptr;
+  COPCORE_CUDA_CHECK(cudaMalloc(&stats_dev, sizeof(Stats)));
+  Stats *stats = nullptr;
+  COPCORE_CUDA_CHECK(cudaMallocHost(&stats, sizeof(Stats)));
+
+  // Initialize primary particles.
+  constexpr int InitThreads = 32;
+  int initBlocks            = (numParticles + InitThreads - 1) / InitThreads;
+  ParticleGenerator electronGenerator(electrons.tracks, electrons.slotManager, electrons.queues.currentlyActive);
+  InitPrimaries<<<initBlocks, InitThreads>>>(electronGenerator, numParticles, energy, world_dev);
+  COPCORE_CUDA_CHECK(cudaDeviceSynchronize());
+
+  stats->inFlight[ParticleType::Electron] = numParticles;
+  stats->inFlight[ParticleType::Positron] = 0;
+  stats->inFlight[ParticleType::Gamma]    = 0;
+
+  std::cout << "INFO: running with field Bz = " << BzFieldValue / copcore::units::tesla << " T";
+  std::cout << std::endl;
+
+  constexpr int MaxBlocks        = 1024;
+  constexpr int TransportThreads = 32;
+  int transportBlocks;
+
+  vecgeom::Stopwatch timer;
+  timer.Start();
+
+  int inFlight;
+  int iterNo = 0, loopingNo = 0;
+  int previousElectrons = -1, previousPositrons = -1;
+
+  do {
+    Secondaries secondaries = {
+        .electrons = {electrons.tracks, electrons.slotManager, electrons.queues.nextActive},
+        .positrons = {positrons.tracks, positrons.slotManager, positrons.queues.nextActive},
+        .gammas    = {gammas.tracks, gammas.slotManager, gammas.queues.nextActive},
+    };
+
+    // *** ELECTRONS ***
+    int numElectrons = stats->inFlight[ParticleType::Electron];
+    if (numElectrons > 0) {
+      transportBlocks = (numElectrons + TransportThreads - 1) / TransportThreads;
+      transportBlocks = std::min(transportBlocks, MaxBlocks);
+
+      TransportElectrons<<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
+          electrons.tracks, electrons.queues.currentlyActive, secondaries, electrons.queues.nextActive, scoring);
+
+      COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream));
+      COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, electrons.event, 0));
+    }
+
+    // *** POSITRONS ***
+    int numPositrons = stats->inFlight[ParticleType::Positron];
+    if (numPositrons > 0) {
+      transportBlocks = (numPositrons + TransportThreads - 1) / TransportThreads;
+      transportBlocks = std::min(transportBlocks, MaxBlocks);
+
+      TransportPositrons<<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
+          positrons.tracks, positrons.queues.currentlyActive, secondaries, positrons.queues.nextActive, scoring);
+
+      COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream));
+      COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, positrons.event, 0));
+    }
+
+    // *** GAMMAS ***
+    int numGammas = stats->inFlight[ParticleType::Gamma];
+    if (numGammas > 0) {
+      transportBlocks = (numGammas + TransportThreads - 1) / TransportThreads;
+      transportBlocks = std::min(transportBlocks, MaxBlocks);
+
+      TransportGammas<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
+          gammas.tracks, gammas.queues.currentlyActive, secondaries, gammas.queues.nextActive, scoring);
+
+      COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream));
+      COPCORE_CUDA_CHECK(cudaStreamWaitEvent(stream, gammas.event, 0));
+    }
+
+    // *** END OF TRANSPORT ***
+
+    // The events ensure synchronization before finishing this iteration and
+    // copying the Stats back to the host.
+    AllParticleQueues queues = {{electrons.queues, positrons.queues, gammas.queues}};
+    FinishIteration<<<1, 1, 0, stream>>>(queues, scoring, stats_dev);
+    COPCORE_CUDA_CHECK(cudaMemcpyAsync(stats, stats_dev, sizeof(Stats), cudaMemcpyDeviceToHost, stream));
+
+    // Finally synchronize all kernels.
+    COPCORE_CUDA_CHECK(cudaStreamSynchronize(stream));
+
+    // Count the number of particles in flight.
+    inFlight = 0;
+    for (int i = 0; i < ParticleType::NumParticleTypes; i++) {
+      inFlight += stats->inFlight[i];
+    }
+
+    // Swap the queues for the next iteration.
+    electrons.queues.SwapActive();
+    positrons.queues.SwapActive();
+    gammas.queues.SwapActive();
+
+    std::cout << std::fixed << std::setprecision(4) << std::setfill(' ');
+    std::cout << "iter " << std::setw(4) << iterNo << " -- tracks in flight: " << std::setw(5) << inFlight
+              << " energy deposition: " << std::setw(10) << stats->scoring.energyDeposit / copcore::units::GeV
+              << " number of secondaries: " << std::setw(5) << stats->scoring.secondaries
+              << " number of hits: " << std::setw(4) << stats->scoring.hits;
+    std::cout << std::endl;
+
+    // Check if only charged particles are left that are looping.
+    numElectrons = stats->inFlight[ParticleType::Electron];
+    numPositrons = stats->inFlight[ParticleType::Positron];
+    numGammas = stats->inFlight[ParticleType::Gamma];
+    if (numElectrons == previousElectrons && numPositrons == previousPositrons && numGammas == 0) {
+      loopingNo++;
+    } else {
+      previousElectrons = numElectrons;
+      previousPositrons = numPositrons;
+      loopingNo = 0;
+    }
+
+    iterNo++;
+  } while (inFlight > 0 && loopingNo < 20);
+
+  auto time_cpu = timer.Stop();
+  std::cout << "Run time: " << time_cpu << "\n";
+
+  // Free resources.
+  COPCORE_CUDA_CHECK(cudaFree(scoring));
+  COPCORE_CUDA_CHECK(cudaFree(stats_dev));
+  COPCORE_CUDA_CHECK(cudaFreeHost(stats));
+
+  COPCORE_CUDA_CHECK(cudaStreamDestroy(stream));
+
+  for (int i = 0; i < ParticleType::NumParticleTypes; i++) {
+    COPCORE_CUDA_CHECK(cudaFree(particles[i].tracks));
+    COPCORE_CUDA_CHECK(cudaFree(particles[i].slotManager));
+
+    COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.currentlyActive));
+    COPCORE_CUDA_CHECK(cudaFree(particles[i].queues.nextActive));
+
+    COPCORE_CUDA_CHECK(cudaStreamDestroy(particles[i].stream));
+    COPCORE_CUDA_CHECK(cudaEventDestroy(particles[i].event));
+  }
+
+  FreeG4HepEm(state);
+}
diff --git a/examples/Example12/example12.h b/examples/Example12/example12.h
new file mode 100644
index 00000000..a6e7d8fd
--- /dev/null
+++ b/examples/Example12/example12.h
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2021 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef EXAMPLE9_H
+#define EXAMPLE9_H
+
+#include <VecGeom/base/Config.h>
+#ifdef VECGEOM_ENABLE_CUDA
+#include <VecGeom/management/CudaManager.h> // forward declares vecgeom::cxx::VPlacedVolume
+#endif
+
+// Interface between C++ and CUDA.
+void InitBVH();
+void example11(const vecgeom::cxx::VPlacedVolume *world, int numParticles, double energy);
+
+#endif
diff --git a/examples/Example12/gammas.cu b/examples/Example12/gammas.cu
new file mode 100644
index 00000000..05fc8027
--- /dev/null
+++ b/examples/Example12/gammas.cu
@@ -0,0 +1,186 @@
+// SPDX-FileCopyrightText: 2021 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example11.cuh"
+
+#include <AdePT/BVHNavigator.h>
+#include <CopCore/PhysicalConstants.h>
+
+#include <G4HepEmGammaManager.hh>
+#include <G4HepEmTrack.hh>
+#include <G4HepEmGammaInteractionCompton.hh>
+#include <G4HepEmGammaInteractionConversion.hh>
+// Pull in implementation.
+#include <G4HepEmGammaManager.icc>
+#include <G4HepEmGammaInteractionCompton.icc>
+#include <G4HepEmGammaInteractionConversion.icc>
+
+__global__ void TransportGammas(Track *gammas, const adept::MParray *active, Secondaries secondaries,
+                                adept::MParray *activeQueue, GlobalScoring *scoring)
+{
+  int activeSize = active->size();
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) {
+    const int slot      = (*active)[i];
+    Track &currentTrack = gammas[slot];
+
+    // Init a track with the needed data to call into G4HepEm.
+    G4HepEmTrack emTrack;
+    emTrack.SetEKin(currentTrack.energy);
+    // For now, just assume a single material.
+    int theMCIndex = 1;
+    emTrack.SetMCIndex(theMCIndex);
+
+    // Sample the `number-of-interaction-left` and put it into the track.
+    for (int ip = 0; ip < 3; ++ip) {
+      double numIALeft = currentTrack.numIALeft[ip];
+      if (numIALeft <= 0) {
+        numIALeft                  = -std::log(currentTrack.Uniform());
+        currentTrack.numIALeft[ip] = numIALeft;
+      }
+      emTrack.SetNumIALeft(numIALeft, ip);
+    }
+
+    // Call G4HepEm to compute the physics step limit.
+    G4HepEmGammaManager::HowFar(&g4HepEmData, &g4HepEmPars, &emTrack);
+
+    // Get result into variables.
+    double geometricalStepLengthFromPhysics = emTrack.GetGStepLength();
+    int winnerProcessIndex = emTrack.GetWinnerProcessIndex();
+    // Leave the range and MFP inside the G4HepEmTrack. If we split kernels, we
+    // also need to carry them over!
+
+    // Check if there's a volume boundary in between.
+    vecgeom::NavStateIndex nextState;
+    double geometryStepLength = BVHNavigator::ComputeStepAndNextVolume(
+        currentTrack.pos, currentTrack.dir, geometricalStepLengthFromPhysics, currentTrack.navState, nextState);
+    currentTrack.pos += geometryStepLength * currentTrack.dir;
+
+    if (nextState.IsOnBoundary()) {
+      emTrack.SetGStepLength(geometryStepLength);
+      emTrack.SetOnBoundary(true);
+    }
+
+    G4HepEmGammaManager::UpdateNumIALeft(&emTrack);
+
+    // Save the `number-of-interaction-left` in our track.
+    for (int ip = 0; ip < 3; ++ip) {
+      double numIALeft           = emTrack.GetNumIALeft(ip);
+      currentTrack.numIALeft[ip] = numIALeft;
+    }
+
+    if (nextState.IsOnBoundary()) {
+      // For now, just count that we hit something.
+      atomicAdd(&scoring->hits, 1);
+
+      // Kill the particle if it left the world.
+      if (nextState.Top() != nullptr) {
+        activeQueue->push_back(slot);
+
+        // Move to the next boundary.
+        BVHNavigator::RelocateToNextVolume(currentTrack.pos, currentTrack.dir, nextState);
+        currentTrack.navState = nextState;
+      }
+      continue;
+    } else if (winnerProcessIndex < 0) {
+      // No discrete process, move on.
+      activeQueue->push_back(slot);
+      continue;
+    }
+
+    // Reset number of interaction left for the winner discrete process.
+    // (Will be resampled in the next iteration.)
+    currentTrack.numIALeft[winnerProcessIndex] = -1.0;
+
+    // Perform the discrete interaction.
+    RanluxppDoubleEngine rnge(&currentTrack.rngState);
+    // We might need one branched RNG state, prepare while threads are synchronized.
+    RanluxppDouble newRNG(currentTrack.rngState.Branch());
+
+    const double energy   = currentTrack.energy;
+
+    switch (winnerProcessIndex) {
+    case 0: {
+      // Invoke gamma conversion to e-/e+ pairs, if the energy is above the threshold.
+      if (energy < 2 * copcore::units::kElectronMassC2) {
+        activeQueue->push_back(slot);
+        continue;
+      }
+
+      double logEnergy = std::log(energy);
+      double elKinEnergy, posKinEnergy;
+      G4HepEmGammaInteractionConversion::SampleKinEnergies(&g4HepEmData, energy, logEnergy, theMCIndex, elKinEnergy,
+                                                           posKinEnergy, &rnge);
+
+      double dirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()};
+      double dirSecondaryEl[3], dirSecondaryPos[3];
+      G4HepEmGammaInteractionConversion::SampleDirections(dirPrimary, dirSecondaryEl, dirSecondaryPos, elKinEnergy,
+                                                          posKinEnergy, &rnge);
+
+      Track &electron = secondaries.electrons.NextTrack();
+      Track &positron = secondaries.positrons.NextTrack();
+      atomicAdd(&scoring->secondaries, 2);
+
+      electron.InitAsSecondary(/*parent=*/currentTrack);
+      electron.rngState = newRNG;
+      electron.energy = elKinEnergy;
+      electron.dir.Set(dirSecondaryEl[0], dirSecondaryEl[1], dirSecondaryEl[2]);
+
+      positron.InitAsSecondary(/*parent=*/currentTrack);
+      // Reuse the RNG state of the dying track.
+      positron.rngState = currentTrack.rngState;
+      positron.energy = posKinEnergy;
+      positron.dir.Set(dirSecondaryPos[0], dirSecondaryPos[1], dirSecondaryPos[2]);
+
+      // The current track is killed by not enqueuing into the next activeQueue.
+      break;
+    }
+    case 1: {
+      // Invoke Compton scattering of gamma.
+      constexpr double LowEnergyThreshold = 100 * copcore::units::eV;
+      if (energy < LowEnergyThreshold) {
+        activeQueue->push_back(slot);
+        continue;
+      }
+      const double origDirPrimary[] = {currentTrack.dir.x(), currentTrack.dir.y(), currentTrack.dir.z()};
+      double dirPrimary[3];
+      const double newEnergyGamma =
+          G4HepEmGammaInteractionCompton::SamplePhotonEnergyAndDirection(energy, dirPrimary, origDirPrimary, &rnge);
+      vecgeom::Vector3D<double> newDirGamma(dirPrimary[0], dirPrimary[1], dirPrimary[2]);
+
+      const double energyEl = energy - newEnergyGamma;
+      if (energyEl > LowEnergyThreshold) {
+        // Create a secondary electron and sample/compute directions.
+        Track &electron = secondaries.electrons.NextTrack();
+        atomicAdd(&scoring->secondaries, 1);
+
+        electron.InitAsSecondary(/*parent=*/currentTrack);
+        electron.rngState = newRNG;
+        electron.energy = energyEl;
+        electron.dir = energy * currentTrack.dir - newEnergyGamma * newDirGamma;
+        electron.dir.Normalize();
+      } else {
+        atomicAdd(&scoring->energyDeposit, energyEl);
+      }
+
+      // Check the new gamma energy and deposit if below threshold.
+      if (newEnergyGamma > LowEnergyThreshold) {
+        currentTrack.energy = newEnergyGamma;
+        currentTrack.dir = newDirGamma;
+
+        // The current track continues to live.
+        activeQueue->push_back(slot);
+      } else {
+        atomicAdd(&scoring->energyDeposit, newEnergyGamma);
+        // The current track is killed by not enqueuing into the next activeQueue.
+      }
+      break;
+    }
+    case 2: {
+      // Invoke photoelectric process: right now only absorb the gamma.
+      atomicAdd(&scoring->energyDeposit, energy);
+      // The current track is killed by not enqueuing into the next activeQueue.
+      break;
+    }
+    }
+  }
+}