From 734ab67298adc7e7bf9aa9c963fee3b5502d18da Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Thu, 26 Mar 2026 21:49:45 +0000
Subject: [PATCH 01/14] feat: add StandAloneGeant4Validation for CPU-only
 optical photon benchmark

Pure Geant4 optical photon simulation with no opticks/GPU dependencies.
Loads GDML, fires torch photons, collects hits via G4VSensitiveDetector,
saves g4_hits.npy for comparison with GPU output.

Supports multi-threaded mode via G4MTRunManager (-t N flag) by splitting
photons across multiple events. Sequential mode (-t 0) for reproducibility.
---
 src/CMakeLists.txt                 |  10 +-
 src/StandAloneGeant4Validation.cpp | 137 ++++++++++++++
 src/StandAloneGeant4Validation.h   | 287 +++++++++++++++++++++++++++++
 3 files changed, 433 insertions(+), 1 deletion(-)
 create mode 100644 src/StandAloneGeant4Validation.cpp
 create mode 100644 src/StandAloneGeant4Validation.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b58cd89c2..99c3aba0b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -77,6 +77,14 @@ target_include_directories(GPUPhotonFileSource PRIVATE
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
 )
 
+# StandAloneGeant4Validation - pure G4 optical photon simulation (no opticks GPU)
+add_executable(StandAloneGeant4Validation StandAloneGeant4Validation.cpp StandAloneGeant4Validation.h)
+target_link_libraries(StandAloneGeant4Validation gphox gphox_g4_deps)
+target_include_directories(StandAloneGeant4Validation PRIVATE
+    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
+    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
+)
+
 # simtox creates a numpy file with initial photons for simulation
 add_executable(simtox simtox.cpp)
 
@@ -87,7 +95,7 @@ target_include_directories(simtox PRIVATE
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
 )
 
-install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource simtox gphox gphox_g4_deps
+install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource StandAloneGeant4Validation simtox gphox gphox_g4_deps
     EXPORT ${PROJECT_NAME}Targets
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp
new file mode 100644
index 000000000..e31ff669b
--- /dev/null
+++ b/src/StandAloneGeant4Validation.cpp
@@ -0,0 +1,137 @@
+#include <string>
+#include <thread>
+
+#include <argparse/argparse.hpp>
+
+#include "FTFP_BERT.hh"
+#include "G4OpticalPhysics.hh"
+#include "G4MTRunManager.hh"
+#include "G4RunManager.hh"
+#include "G4VModularPhysicsList.hh"
+#include "G4UImanager.hh"
+
+#include "StandAloneGeant4Validation.h"
+#include "config.h"
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+    argparse::ArgumentParser program("StandAloneGeant4Validation", "0.0.0");
+
+    string gdml_file, config_name;
+    int num_threads = 0;
+
+    program.add_argument("-g", "--gdml")
+        .help("path to GDML file")
+        .default_value(string("geom.gdml"))
+        .nargs(1)
+        .store_into(gdml_file);
+
+    program.add_argument("-c", "--config")
+        .help("the name of a config file")
+        .default_value(string("dev"))
+        .nargs(1)
+        .store_into(config_name);
+
+    program.add_argument("-s", "--seed")
+        .help("fixed random seed (default: time-based)")
+        .scan<'i', long>();
+
+    program.add_argument("-t", "--threads")
+        .help("number of threads (0=sequential, default: hardware concurrency)")
+        .default_value(-1)
+        .scan<'i', int>()
+        .store_into(num_threads);
+
+    try
+    {
+        program.parse_args(argc, argv);
+    }
+    catch (const exception &err)
+    {
+        cerr << err.what() << endl;
+        cerr << program;
+        exit(EXIT_FAILURE);
+    }
+
+    long seed;
+    if (program.is_used("--seed"))
+        seed = program.get<long>("--seed");
+    else
+        seed = static_cast<long>(time(nullptr));
+
+    gphox::Config cfg(config_name);
+    int total_photons = cfg.torch.numphoton;
+
+    // Determine threading mode
+    bool use_mt = (num_threads != 0);
+    if (num_threads < 0)
+        num_threads = std::thread::hardware_concurrency();
+    if (num_threads < 1)
+        num_threads = 1;
+
+    // In MT mode: split photons across events, one event per thread-batch
+    // In sequential mode: one event with all photons (original behavior)
+    int num_events, photons_per_event;
+    if (use_mt)
+    {
+        num_events = num_threads * 4;  // 4 events per thread for load balancing
+        photons_per_event = (total_photons + num_events - 1) / num_events;
+        // Adjust num_events so we don't overshoot
+        num_events = (total_photons + photons_per_event - 1) / photons_per_event;
+    }
+    else
+    {
+        num_events = 1;
+        photons_per_event = total_photons;
+    }
+
+    int actual_photons = num_events * photons_per_event;
+
+    G4cout << "Random seed set to: " << seed << G4endl;
+    G4cout << "G4: " << total_photons << " photons, "
+           << num_events << " events x " << photons_per_event << " photons/event"
+           << " (" << actual_photons << " actual)"
+           << (use_mt ? ", " + to_string(num_threads) + " threads" : ", sequential")
+           << G4endl;
+
+    HitAccumulator accumulator;
+
+    G4VModularPhysicsList *physics = new FTFP_BERT;
+    physics->RegisterPhysics(new G4OpticalPhysics);
+
+    if (use_mt)
+    {
+        auto *run_mgr = new G4MTRunManager;
+        run_mgr->SetNumberOfThreads(num_threads);
+        run_mgr->SetUserInitialization(physics);
+        run_mgr->SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator));
+        run_mgr->SetUserInitialization(
+            new G4OnlyActionInitialization(cfg, &accumulator, photons_per_event, num_events));
+        run_mgr->Initialize();
+
+        CLHEP::HepRandom::setTheSeed(seed);
+
+        G4cout << "G4: Starting MT run with " << num_events << " events..." << G4endl;
+        run_mgr->BeamOn(num_events);
+
+        delete run_mgr;
+    }
+    else
+    {
+        G4RunManager run_mgr;
+        run_mgr.SetUserInitialization(physics);
+        run_mgr.SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator));
+        run_mgr.SetUserInitialization(
+            new G4OnlyActionInitialization(cfg, &accumulator, photons_per_event, num_events));
+        run_mgr.Initialize();
+
+        CLHEP::HepRandom::setTheSeed(seed);
+
+        G4cout << "G4: Starting sequential run..." << G4endl;
+        run_mgr.BeamOn(num_events);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/src/StandAloneGeant4Validation.h b/src/StandAloneGeant4Validation.h
new file mode 100644
index 000000000..3cfc8010f
--- /dev/null
+++ b/src/StandAloneGeant4Validation.h
@@ -0,0 +1,287 @@
+#pragma once
+
+#include <filesystem>
+#include <mutex>
+#include <vector>
+#include <cmath>
+
+#include "G4Event.hh"
+#include "G4GDMLParser.hh"
+#include "G4THitsCollection.hh"
+#include "G4VHit.hh"
+#include "G4OpticalPhoton.hh"
+#include "G4PhysicalConstants.hh"
+#include "G4PrimaryParticle.hh"
+#include "G4PrimaryVertex.hh"
+#include "G4Run.hh"
+#include "G4SDManager.hh"
+#include "G4SystemOfUnits.hh"
+#include "G4ThreeVector.hh"
+#include "G4Track.hh"
+#include "G4TrackStatus.hh"
+#include "G4UserEventAction.hh"
+#include "G4UserRunAction.hh"
+#include "G4VPhysicalVolume.hh"
+#include "G4VUserActionInitialization.hh"
+#include "G4VUserDetectorConstruction.hh"
+#include "G4VUserPrimaryGeneratorAction.hh"
+
+#include "sysrap/NP.hh"
+#include "sysrap/sphoton.h"
+
+#include "config.h"
+#include "torch.h"
+
+// ---- Global hit accumulator (thread-safe) ----
+
+struct HitAccumulator
+{
+    std::mutex mtx;
+    std::vector<sphoton> hits;
+
+    void AddHits(const std::vector<sphoton> &event_hits)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        hits.insert(hits.end(), event_hits.begin(), event_hits.end());
+    }
+
+    void Save(const char *filename)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        G4int num_hits = hits.size();
+        NP *arr = NP::Make<float>(num_hits, 4, 4);
+        for (int i = 0; i < num_hits; i++)
+        {
+            float *data = reinterpret_cast<float *>(&hits[i]);
+            std::copy(data, data + 16, arr->values<float>() + i * 16);
+        }
+        arr->save(filename);
+        delete arr;
+        G4cout << "G4: Saved " << num_hits << " total hits to " << filename << G4endl;
+    }
+};
+
+// ---- Sensitive Detector: collects optical photon hits per event ----
+
+struct G4PhotonHit : public G4VHit
+{
+    G4PhotonHit() = default;
+
+    G4PhotonHit(G4double energy, G4double time, G4ThreeVector position,
+                G4ThreeVector direction, G4ThreeVector polarization)
+        : photon()
+    {
+        photon.pos = {static_cast<float>(position.x()),
+                      static_cast<float>(position.y()),
+                      static_cast<float>(position.z())};
+        photon.time = time;
+        photon.mom = {static_cast<float>(direction.x()),
+                      static_cast<float>(direction.y()),
+                      static_cast<float>(direction.z())};
+        photon.pol = {static_cast<float>(polarization.x()),
+                      static_cast<float>(polarization.y()),
+                      static_cast<float>(polarization.z())};
+        photon.wavelength = h_Planck * c_light / (energy * CLHEP::eV);
+    }
+
+    void Print() override { G4cout << photon << G4endl; }
+
+    sphoton photon;
+};
+
+using G4PhotonHitsCollection = G4THitsCollection<G4PhotonHit>;
+
+struct G4PhotonSD : public G4VSensitiveDetector
+{
+    HitAccumulator *accumulator;
+
+    G4PhotonSD(G4String name, HitAccumulator *acc)
+        : G4VSensitiveDetector(name), accumulator(acc)
+    {
+        G4String HCname = name + "_HC";
+        collectionName.insert(HCname);
+    }
+
+    void Initialize(G4HCofThisEvent *hce) override
+    {
+        fHitsCollection = new G4PhotonHitsCollection(SensitiveDetectorName, collectionName[0]);
+        if (fHCID < 0)
+            fHCID = G4SDManager::GetSDMpointer()->GetCollectionID(collectionName[0]);
+        hce->AddHitsCollection(fHCID, fHitsCollection);
+    }
+
+    G4bool ProcessHits(G4Step *aStep, G4TouchableHistory *) override
+    {
+        G4Track *track = aStep->GetTrack();
+        if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
+            return false;
+
+        G4PhotonHit *hit = new G4PhotonHit(
+            track->GetTotalEnergy(),
+            track->GetGlobalTime(),
+            aStep->GetPostStepPoint()->GetPosition(),
+            aStep->GetPostStepPoint()->GetMomentumDirection(),
+            aStep->GetPostStepPoint()->GetPolarization());
+
+        fHitsCollection->insert(hit);
+        track->SetTrackStatus(fStopAndKill);
+        return true;
+    }
+
+    void EndOfEvent(G4HCofThisEvent *) override
+    {
+        G4int num_hits = fHitsCollection->entries();
+
+        std::vector<sphoton> event_hits;
+        event_hits.reserve(num_hits);
+        for (G4PhotonHit *hit : *fHitsCollection->GetVector())
+            event_hits.push_back(hit->photon);
+
+        accumulator->AddHits(event_hits);
+    }
+
+  private:
+    G4PhotonHitsCollection *fHitsCollection = nullptr;
+    G4int fHCID = -1;
+};
+
+// ---- Detector Construction: loads GDML, attaches SD ----
+
+struct G4OnlyDetectorConstruction : G4VUserDetectorConstruction
+{
+    G4OnlyDetectorConstruction(std::filesystem::path gdml_file, HitAccumulator *acc)
+        : gdml_file_(gdml_file), accumulator_(acc) {}
+
+    G4VPhysicalVolume *Construct() override
+    {
+        parser_.Read(gdml_file_.string(), false);
+        return parser_.GetWorldVolume();
+    }
+
+    void ConstructSDandField() override
+    {
+        G4SDManager *SDman = G4SDManager::GetSDMpointer();
+        const G4GDMLAuxMapType *auxmap = parser_.GetAuxMap();
+
+        for (auto const &[logVol, listType] : *auxmap)
+        {
+            for (auto const &auxtype : listType)
+            {
+                if (auxtype.type == "SensDet")
+                {
+                    G4String name = logVol->GetName() + "_" + auxtype.value;
+                    G4cout << "G4: Attaching SD to " << logVol->GetName() << G4endl;
+                    G4PhotonSD *sd = new G4PhotonSD(name, accumulator_);
+                    SDman->AddNewDetector(sd);
+                    logVol->SetSensitiveDetector(sd);
+                }
+            }
+        }
+    }
+
+  private:
+    std::filesystem::path gdml_file_;
+    G4GDMLParser parser_;
+    HitAccumulator *accumulator_;
+};
+
+// ---- Primary Generator: distributes photons across events ----
+
+struct G4OnlyPrimaryGenerator : G4VUserPrimaryGeneratorAction
+{
+    gphox::Config cfg;
+    int photons_per_event;
+
+    G4OnlyPrimaryGenerator(const gphox::Config &cfg, int photons_per_event)
+        : cfg(cfg), photons_per_event(photons_per_event) {}
+
+    void GeneratePrimaries(G4Event *event) override
+    {
+        int eventID = event->GetEventID();
+
+        // Generate photons for this event's batch using event-specific seed offset
+        storch t = cfg.torch;
+        t.numphoton = photons_per_event;
+        std::vector<sphoton> sphotons = generate_photons(t, photons_per_event, eventID);
+
+        for (const sphoton &p : sphotons)
+        {
+            G4ThreeVector position(p.pos.x, p.pos.y, p.pos.z);
+            G4ThreeVector direction(p.mom.x, p.mom.y, p.mom.z);
+            G4ThreeVector polarization(p.pol.x, p.pol.y, p.pol.z);
+            G4double wavelength_nm = p.wavelength;
+
+            G4PrimaryVertex *vertex = new G4PrimaryVertex(position, p.time);
+            G4double energy = h_Planck * c_light / (wavelength_nm * nm);
+
+            G4PrimaryParticle *particle = new G4PrimaryParticle(G4OpticalPhoton::Definition());
+            particle->SetKineticEnergy(energy);
+            particle->SetMomentumDirection(direction);
+            particle->SetPolarization(polarization);
+
+            vertex->SetPrimary(particle);
+            event->AddPrimaryVertex(vertex);
+        }
+    }
+};
+
+// ---- Event Action: reports per-event progress ----
+
+struct G4OnlyEventAction : G4UserEventAction
+{
+    int total_events;
+
+    G4OnlyEventAction(int total_events) : total_events(total_events) {}
+
+    void EndOfEventAction(const G4Event *event) override
+    {
+        int id = event->GetEventID();
+        if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events)
+            G4cout << "G4: Event " << id + 1 << "/" << total_events << G4endl;
+    }
+};
+
+// ---- Run Action: saves merged hits at end ----
+
+struct G4OnlyRunAction : G4UserRunAction
+{
+    HitAccumulator *accumulator;
+
+    G4OnlyRunAction(HitAccumulator *acc) : accumulator(acc) {}
+
+    void EndOfRunAction(const G4Run *) override
+    {
+        if (G4Threading::IsMasterThread() || !G4Threading::IsMultithreadedApplication())
+        {
+            G4cout << "G4: Total accumulated hits: " << accumulator->hits.size() << G4endl;
+            accumulator->Save("g4_hits.npy");
+        }
+    }
+};
+
+// ---- Action Initialization (required for MT) ----
+
+struct G4OnlyActionInitialization : G4VUserActionInitialization
+{
+    gphox::Config cfg;
+    HitAccumulator *accumulator;
+    int photons_per_event;
+    int num_events;
+
+    G4OnlyActionInitialization(const gphox::Config &cfg, HitAccumulator *acc,
+                               int photons_per_event, int num_events)
+        : cfg(cfg), accumulator(acc), photons_per_event(photons_per_event),
+          num_events(num_events) {}
+
+    void BuildForMaster() const override
+    {
+        SetUserAction(new G4OnlyRunAction(accumulator));
+    }
+
+    void Build() const override
+    {
+        SetUserAction(new G4OnlyPrimaryGenerator(cfg, photons_per_event));
+        SetUserAction(new G4OnlyEventAction(num_events));
+        SetUserAction(new G4OnlyRunAction(accumulator));
+    }
+};

From 269c2aebdf6184b5212b65327ebaa59bd40c8b78 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 20:53:22 +0000
Subject: [PATCH 02/14] feat: add --aligned mode to StandAloneGeant4Validation

Adds photon-by-photon aligned comparison with GPU via U4Random
precooked curand sequences. Includes PhotonFateAccumulator for
indexed g4_photon.npy output, AlignedOpticalPhysics with
InstrumentedG4OpBoundaryProcess, and U4Recorder integration
for SEvt lifecycle and random alignment.
---
 src/StandAloneGeant4Validation.cpp |  33 ++-
 src/StandAloneGeant4Validation.h   | 342 ++++++++++++++++++++++++++++-
 2 files changed, 365 insertions(+), 10 deletions(-)

diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp
index e31ff669b..deea41071 100644
--- a/src/StandAloneGeant4Validation.cpp
+++ b/src/StandAloneGeant4Validation.cpp
@@ -44,6 +44,11 @@ int main(int argc, char **argv)
         .scan<'i', int>()
         .store_into(num_threads);
 
+    program.add_argument("--aligned")
+        .help("enable photon-by-photon aligned comparison with GPU (forces sequential)")
+        .default_value(false)
+        .implicit_value(true);
+
     try
     {
         program.parse_args(argc, argv);
@@ -61,9 +66,15 @@ int main(int argc, char **argv)
     else
         seed = static_cast<long>(time(nullptr));
 
+    bool aligned = program.get<bool>("--aligned");
+
     gphox::Config cfg(config_name);
     int total_photons = cfg.torch.numphoton;
 
+    // Aligned mode forces sequential (U4Random is single-threaded)
+    if (aligned)
+        num_threads = 0;
+
     // Determine threading mode
     bool use_mt = (num_threads != 0);
     if (num_threads < 0)
@@ -97,9 +108,16 @@ int main(int argc, char **argv)
            << G4endl;
 
     HitAccumulator accumulator;
+    PhotonFateAccumulator fate;
+
+    if (aligned)
+        fate.Resize(total_photons);
 
     G4VModularPhysicsList *physics = new FTFP_BERT;
-    physics->RegisterPhysics(new G4OpticalPhysics);
+    if (aligned)
+        physics->RegisterPhysics(new AlignedOpticalPhysics);
+    else
+        physics->RegisterPhysics(new G4OpticalPhysics);
 
     if (use_mt)
     {
@@ -108,7 +126,7 @@ int main(int argc, char **argv)
         run_mgr->SetUserInitialization(physics);
         run_mgr->SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator));
         run_mgr->SetUserInitialization(
-            new G4OnlyActionInitialization(cfg, &accumulator, photons_per_event, num_events));
+            new G4OnlyActionInitialization(cfg, &accumulator, &fate, photons_per_event, num_events, aligned));
         run_mgr->Initialize();
 
         CLHEP::HepRandom::setTheSeed(seed);
@@ -124,7 +142,16 @@ int main(int argc, char **argv)
         run_mgr.SetUserInitialization(physics);
         run_mgr.SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator));
         run_mgr.SetUserInitialization(
-            new G4OnlyActionInitialization(cfg, &accumulator, photons_per_event, num_events));
+            new G4OnlyActionInitialization(cfg, &accumulator, &fate, photons_per_event, num_events, aligned));
+
+        if (aligned)
+        {
+            G4cout << "G4: Aligned mode — configuring SEvt and U4Random" << G4endl;
+            setenv("SEvent__MakeGenstep_num_ph", std::to_string(total_photons).c_str(), 1);
+            setenv("OPTICKS_MAX_BOUNCE", "1000", 0);
+            U4Random::Create();
+        }
+
         run_mgr.Initialize();
 
         CLHEP::HepRandom::setTheSeed(seed);
diff --git a/src/StandAloneGeant4Validation.h b/src/StandAloneGeant4Validation.h
index 3cfc8010f..3bcbbc7d6 100644
--- a/src/StandAloneGeant4Validation.h
+++ b/src/StandAloneGeant4Validation.h
@@ -21,10 +21,24 @@
 #include "G4TrackStatus.hh"
 #include "G4UserEventAction.hh"
 #include "G4UserRunAction.hh"
+#include "G4UserSteppingAction.hh"
+#include "G4UserTrackingAction.hh"
 #include "G4VPhysicalVolume.hh"
 #include "G4VUserActionInitialization.hh"
 #include "G4VUserDetectorConstruction.hh"
 #include "G4VUserPrimaryGeneratorAction.hh"
+#include "G4OpBoundaryProcess.hh"
+#include "G4ProcessManager.hh"
+#include "G4VPhysicsConstructor.hh"
+#include "G4OpWLS.hh"
+
+#include "ShimG4OpAbsorption.hh"
+#include "ShimG4OpRayleigh.hh"
+#include "InstrumentedG4OpBoundaryProcess.hh"
+#include "U4Random.hh"
+#include "U4Recorder.hh"
+#include "SEvt.hh"
+#include "SEventConfig.hh"
 
 #include "sysrap/NP.hh"
 #include "sysrap/sphoton.h"
@@ -225,19 +239,303 @@ struct G4OnlyPrimaryGenerator : G4VUserPrimaryGeneratorAction
     }
 };
 
+// ---- Photon fate accumulator: tracks ALL photon final states ----
+
+struct PhotonFateAccumulator
+{
+    std::mutex mtx;
+    std::vector<sphoton> photons;
+    bool indexed = false;  // true for aligned mode: store by photon index
+
+    // Opticks flag enum values
+    static constexpr unsigned TORCH            = 0x0004;
+    static constexpr unsigned BULK_ABSORB      = 0x0008;
+    static constexpr unsigned BULK_REEMIT      = 0x0010;
+    static constexpr unsigned BULK_SCATTER     = 0x0020;
+    static constexpr unsigned SURFACE_DETECT   = 0x0040;
+    static constexpr unsigned SURFACE_ABSORB   = 0x0080;
+    static constexpr unsigned SURFACE_DREFLECT = 0x0100;
+    static constexpr unsigned SURFACE_SREFLECT = 0x0200;
+    static constexpr unsigned BOUNDARY_REFLECT = 0x0400;
+    static constexpr unsigned BOUNDARY_TRANSMIT= 0x0800;
+    static constexpr unsigned MISS             = 0x8000;
+
+    void Resize(int n)
+    {
+        photons.resize(n);
+        indexed = true;
+    }
+
+    void Set(int idx, const sphoton& p)
+    {
+        if (idx >= 0 && idx < (int)photons.size())
+            photons[idx] = p;
+    }
+
+    void Add(const sphoton& p)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        photons.push_back(p);
+    }
+
+    void Save(const char* filename)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        int n = photons.size();
+        NP* arr = NP::Make<float>(n, 4, 4);
+        for (int i = 0; i < n; i++)
+        {
+            float* data = reinterpret_cast<float*>(&photons[i]);
+            std::copy(data, data + 16, arr->values<float>() + i * 16);
+        }
+        arr->save(filename);
+        delete arr;
+        G4cout << "G4: Saved " << n << " photon fates to " << filename << G4endl;
+    }
+};
+
+// ---- Stepping Action: tracks photon fates with opticks-compatible flags ----
+
+struct G4OnlySteppingAction : G4UserSteppingAction
+{
+    PhotonFateAccumulator* fate;
+    bool aligned;
+    U4Recorder* recorder = nullptr;
+    std::map<std::string, int> proc_death_counts;
+    std::map<int, int> boundary_status_counts;
+    std::mutex count_mtx;
+
+    G4OnlySteppingAction(PhotonFateAccumulator* f, bool aligned_ = false)
+        : fate(f), aligned(aligned_) {}
+
+    ~G4OnlySteppingAction()
+    {
+        std::lock_guard<std::mutex> lock(count_mtx);
+        if (!proc_death_counts.empty())
+        {
+            G4cout << "\nG4: Photon death process summary:" << G4endl;
+            for (auto& [name, count] : proc_death_counts)
+                G4cout << "  " << name << ": " << count << G4endl;
+        }
+        if (!boundary_status_counts.empty())
+        {
+            G4cout << "\nG4: OpBoundary status counts (all steps):" << G4endl;
+            const char* bnames[] = {
+                "Undefined","Transmission","FresnelRefraction","FresnelReflection",
+                "TotalInternalReflection","LambertianReflection","LobeReflection",
+                "SpikeReflection","BackScattering","Absorption","Detection",
+                "NotAtBoundary","SameMaterial","StepTooSmall","NoRINDEX",
+                "PolishedLumirrorAirReflection","PolishedLumirrorGlueReflection",
+                "PolishedAirReflection","PolishedTeflonAirReflection",
+                "PolishedTiOAirReflection","PolishedTyvekAirReflection",
+                "PolishedVM2000AirReflection","PolishedVM2000GlueReflection",
+                "EtchedLumirrorAirReflection","EtchedLumirrorGlueReflection",
+                "EtchedAirReflection","EtchedTeflonAirReflection",
+                "EtchedTiOAirReflection","EtchedTyvekAirReflection",
+                "EtchedVM2000AirReflection","EtchedVM2000GlueReflection",
+                "GroundLumirrorAirReflection","GroundLumirrorGlueReflection",
+                "GroundAirReflection","GroundTeflonAirReflection",
+                "GroundTiOAirReflection","GroundTyvekAirReflection",
+                "GroundVM2000AirReflection","GroundVM2000GlueReflection",
+                "Dichroic","CoatedDielectricReflection","CoatedDielectricRefraction",
+                "CoatedDielectricFrustratedTransmission"
+            };
+            for (auto& [st, count] : boundary_status_counts)
+            {
+                const char* nm = (st >= 0 && st < 43) ? bnames[st] : "?";
+                G4cout << "  " << nm << "(" << st << "): " << count << G4endl;
+            }
+        }
+    }
+
+    void UserSteppingAction(const G4Step* aStep) override
+    {
+        // Forward to U4Recorder first for random alignment
+        if (recorder)
+            recorder->UserSteppingAction(aStep);
+
+        G4Track* track = aStep->GetTrack();
+        if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
+            return;
+
+        G4StepPoint* post = aStep->GetPostStepPoint();
+        G4TrackStatus status = track->GetTrackStatus();
+
+        // Find the OpBoundary process to get its status (for ALL steps)
+        G4OpBoundaryProcess* boundary = nullptr;
+        G4ProcessManager* pm = track->GetDefinition()->GetProcessManager();
+        for (int i = 0; i < pm->GetPostStepProcessVector()->entries(); i++)
+        {
+            G4VProcess* p = (*pm->GetPostStepProcessVector())[i];
+            boundary = dynamic_cast<G4OpBoundaryProcess*>(p);
+            if (boundary) break;
+        }
+
+        G4OpBoundaryProcessStatus bStatus = boundary ? boundary->GetStatus() : Undefined;
+
+        // Count boundary status for ALL steps
+        if (boundary && bStatus != NotAtBoundary && bStatus != Undefined && bStatus != StepTooSmall)
+        {
+            std::lock_guard<std::mutex> lock(count_mtx);
+            boundary_status_counts[int(bStatus)]++;
+        }
+
+        // Only record photon state when the photon is about to die
+        if (status != fStopAndKill && status != fStopButAlive)
+            return;
+
+        // Identify the process
+        const G4VProcess* proc = post->GetProcessDefinedStep();
+        G4String procName = proc ? proc->GetProcessName() : "Unknown";
+
+        // Build detailed key for counting
+        std::string key = procName;
+        if (procName == "OpBoundary" && boundary)
+            key += "(" + std::to_string(int(bStatus)) + ")";
+        key += (status == fStopAndKill ? "/Kill" : "/Alive");
+
+        {
+            std::lock_guard<std::mutex> lock(count_mtx);
+            proc_death_counts[key]++;
+        }
+
+        // Map to opticks flag
+        unsigned flag = 0;
+
+        if (procName == "OpAbsorption")
+        {
+            flag = PhotonFateAccumulator::BULK_ABSORB;
+        }
+        else if (procName == "OpWLS")
+        {
+            flag = PhotonFateAccumulator::BULK_REEMIT;
+        }
+        else if (procName == "OpBoundary" && boundary)
+        {
+            switch (bStatus)
+            {
+                case Detection:       flag = PhotonFateAccumulator::SURFACE_DETECT; break;
+                case Absorption:      flag = PhotonFateAccumulator::SURFACE_ABSORB; break;
+                case FresnelReflection:
+                case TotalInternalReflection:
+                                      flag = PhotonFateAccumulator::BOUNDARY_REFLECT; break;
+                case FresnelRefraction: flag = PhotonFateAccumulator::BOUNDARY_TRANSMIT; break;
+                case LambertianReflection:
+                case LobeReflection:  flag = PhotonFateAccumulator::SURFACE_DREFLECT; break;
+                case SpikeReflection: flag = PhotonFateAccumulator::SURFACE_SREFLECT; break;
+                case BackScattering:  flag = PhotonFateAccumulator::SURFACE_DREFLECT; break;
+                default:              flag = PhotonFateAccumulator::SURFACE_ABSORB; break;
+            }
+        }
+        else if (procName == "Transportation")
+        {
+            // Check if an SD killed this photon (SURFACE_DETECT)
+            G4StepPoint* pre = aStep->GetPreStepPoint();
+            G4VPhysicalVolume* preVol = pre->GetPhysicalVolume();
+            G4VPhysicalVolume* postVol = post->GetPhysicalVolume();
+            G4LogicalVolume* preLog = preVol ? preVol->GetLogicalVolume() : nullptr;
+            G4LogicalVolume* postLog = postVol ? postVol->GetLogicalVolume() : nullptr;
+            bool sd_pre = preLog && preLog->GetSensitiveDetector();
+            bool sd_post = postLog && postLog->GetSensitiveDetector();
+            if (sd_pre || sd_post)
+                flag = PhotonFateAccumulator::SURFACE_DETECT;
+            else
+                flag = PhotonFateAccumulator::BOUNDARY_TRANSMIT;
+        }
+
+        if (flag == 0) flag = PhotonFateAccumulator::MISS; // catch-all
+
+        // Build sphoton with the final state
+        G4ThreeVector pos = post->GetPosition();
+        G4ThreeVector mom = post->GetMomentumDirection();
+        G4ThreeVector pol = post->GetPolarization();
+        G4double time = post->GetGlobalTime();
+        G4double energy = post->GetTotalEnergy();
+
+        sphoton p = {};
+        p.pos = { float(pos.x()), float(pos.y()), float(pos.z()) };
+        p.time = float(time);
+        p.mom = { float(mom.x()), float(mom.y()), float(mom.z()) };
+        p.pol = { float(pol.x()), float(pol.y()), float(pol.z()) };
+        p.wavelength = (energy > 0) ? float(h_Planck * c_light / (energy * CLHEP::eV)) : 0.f;
+
+        p.orient_boundary_flag = flag & 0xFFFF;
+        p.flagmask = flag;
+
+        if (aligned && fate->indexed)
+        {
+            int photon_idx = track->GetTrackID() - 1;  // G4 trackIDs are 1-based
+            fate->Set(photon_idx, p);
+        }
+        else
+        {
+            fate->Add(p);
+        }
+    }
+};
+
+// ---- Tracking Action: per-photon RNG sync for aligned mode ----
+
+struct G4OnlyTrackingAction : G4UserTrackingAction
+{
+    U4Recorder* recorder = nullptr;
+
+    void PreUserTrackingAction(const G4Track* track) override
+    {
+        if (recorder)
+            recorder->PreUserTrackingAction(track);
+    }
+
+    void PostUserTrackingAction(const G4Track* track) override
+    {
+        if (recorder)
+            recorder->PostUserTrackingAction(track);
+    }
+};
+
+// ---- AlignedOpticalPhysics: replaces G4OpticalPhysics with instrumented processes ----
+
+struct AlignedOpticalPhysics : G4VPhysicsConstructor
+{
+    AlignedOpticalPhysics() : G4VPhysicsConstructor("AlignedOptical") {}
+
+    void ConstructParticle() override {}  // optical photon already defined by FTFP_BERT
+
+    void ConstructProcess() override
+    {
+        auto* pm = G4OpticalPhoton::OpticalPhoton()->GetProcessManager();
+        pm->AddDiscreteProcess(new ShimG4OpAbsorption());
+        pm->AddDiscreteProcess(new ShimG4OpRayleigh());
+        pm->AddDiscreteProcess(new InstrumentedG4OpBoundaryProcess());
+        pm->AddDiscreteProcess(new G4OpWLS());
+    }
+};
+
 // ---- Event Action: reports per-event progress ----
 
 struct G4OnlyEventAction : G4UserEventAction
 {
     int total_events;
+    U4Recorder* recorder = nullptr;
 
     G4OnlyEventAction(int total_events) : total_events(total_events) {}
 
+    void BeginOfEventAction(const G4Event *event) override
+    {
+        if (recorder)
+        {
+            SEvt::AddTorchGenstep();
+            recorder->BeginOfEventAction_(event->GetEventID());
+        }
+    }
+
     void EndOfEventAction(const G4Event *event) override
     {
         int id = event->GetEventID();
         if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events)
             G4cout << "G4: Event " << id + 1 << "/" << total_events << G4endl;
+        if (recorder)
+            recorder->EndOfEventAction_(id);
     }
 };
 
@@ -246,8 +544,10 @@ struct G4OnlyEventAction : G4UserEventAction
 struct G4OnlyRunAction : G4UserRunAction
 {
     HitAccumulator *accumulator;
+    PhotonFateAccumulator *fate;
 
-    G4OnlyRunAction(HitAccumulator *acc) : accumulator(acc) {}
+    G4OnlyRunAction(HitAccumulator *acc, PhotonFateAccumulator *f = nullptr)
+        : accumulator(acc), fate(f) {}
 
     void EndOfRunAction(const G4Run *) override
     {
@@ -255,6 +555,11 @@ struct G4OnlyRunAction : G4UserRunAction
         {
             G4cout << "G4: Total accumulated hits: " << accumulator->hits.size() << G4endl;
             accumulator->Save("g4_hits.npy");
+            if (fate)
+            {
+                G4cout << "G4: Total photon fates: " << fate->photons.size() << G4endl;
+                fate->Save("g4_photon.npy");
+            }
         }
     }
 };
@@ -265,23 +570,46 @@ struct G4OnlyActionInitialization : G4VUserActionInitialization
 {
     gphox::Config cfg;
     HitAccumulator *accumulator;
+    PhotonFateAccumulator *fate;
     int photons_per_event;
     int num_events;
+    bool aligned;
 
     G4OnlyActionInitialization(const gphox::Config &cfg, HitAccumulator *acc,
-                               int photons_per_event, int num_events)
-        : cfg(cfg), accumulator(acc), photons_per_event(photons_per_event),
-          num_events(num_events) {}
+                               PhotonFateAccumulator *f,
+                               int photons_per_event, int num_events,
+                               bool aligned_ = false)
+        : cfg(cfg), accumulator(acc), fate(f),
+          photons_per_event(photons_per_event),
+          num_events(num_events), aligned(aligned_) {}
 
     void BuildForMaster() const override
     {
-        SetUserAction(new G4OnlyRunAction(accumulator));
+        SetUserAction(new G4OnlyRunAction(accumulator, fate));
     }
 
     void Build() const override
     {
         SetUserAction(new G4OnlyPrimaryGenerator(cfg, photons_per_event));
-        SetUserAction(new G4OnlyEventAction(num_events));
-        SetUserAction(new G4OnlyRunAction(accumulator));
+
+        auto* evt_action = new G4OnlyEventAction(num_events);
+        auto* stepping = new G4OnlySteppingAction(fate, aligned);
+
+        if (aligned)
+        {
+            U4Recorder* rec = U4Recorder::Get();
+            if (!rec) rec = new U4Recorder();
+
+            evt_action->recorder = rec;
+            stepping->recorder = rec;
+
+            auto* tracking = new G4OnlyTrackingAction();
+            tracking->recorder = rec;
+            SetUserAction(tracking);
+        }
+
+        SetUserAction(evt_action);
+        SetUserAction(new G4OnlyRunAction(accumulator, fate));
+        SetUserAction(stepping);
     }
 };

From 7c12be08ffb0146fda010286566a358db7a3e255 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 21:11:28 +0000
Subject: [PATCH 03/14] simplify --aligned mode: drop
 U4Recorder/InstrumentedG4OpBoundaryProcess dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use standard G4OpticalPhysics and direct U4Random::SetSequenceIndex.
Only requires linking U4 (for U4Random) — no u4/ source modifications needed.
---
 src/StandAloneGeant4Validation.cpp |  9 +---
 src/StandAloneGeant4Validation.h   | 79 +++++-------------------------
 2 files changed, 13 insertions(+), 75 deletions(-)

diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp
index deea41071..91d9b080b 100644
--- a/src/StandAloneGeant4Validation.cpp
+++ b/src/StandAloneGeant4Validation.cpp
@@ -114,10 +114,7 @@ int main(int argc, char **argv)
         fate.Resize(total_photons);
 
     G4VModularPhysicsList *physics = new FTFP_BERT;
-    if (aligned)
-        physics->RegisterPhysics(new AlignedOpticalPhysics);
-    else
-        physics->RegisterPhysics(new G4OpticalPhysics);
+    physics->RegisterPhysics(new G4OpticalPhysics);
 
     if (use_mt)
     {
@@ -146,9 +143,7 @@ int main(int argc, char **argv)
 
         if (aligned)
         {
-            G4cout << "G4: Aligned mode — configuring SEvt and U4Random" << G4endl;
-            setenv("SEvent__MakeGenstep_num_ph", std::to_string(total_photons).c_str(), 1);
-            setenv("OPTICKS_MAX_BOUNCE", "1000", 0);
+            G4cout << "G4: Aligned mode — creating U4Random" << G4endl;
             U4Random::Create();
         }
 
diff --git a/src/StandAloneGeant4Validation.h b/src/StandAloneGeant4Validation.h
index 3bcbbc7d6..d1c0f0bdb 100644
--- a/src/StandAloneGeant4Validation.h
+++ b/src/StandAloneGeant4Validation.h
@@ -29,16 +29,8 @@
 #include "G4VUserPrimaryGeneratorAction.hh"
 #include "G4OpBoundaryProcess.hh"
 #include "G4ProcessManager.hh"
-#include "G4VPhysicsConstructor.hh"
-#include "G4OpWLS.hh"
 
-#include "ShimG4OpAbsorption.hh"
-#include "ShimG4OpRayleigh.hh"
-#include "InstrumentedG4OpBoundaryProcess.hh"
 #include "U4Random.hh"
-#include "U4Recorder.hh"
-#include "SEvt.hh"
-#include "SEventConfig.hh"
 
 #include "sysrap/NP.hh"
 #include "sysrap/sphoton.h"
@@ -300,7 +292,6 @@ struct G4OnlySteppingAction : G4UserSteppingAction
 {
     PhotonFateAccumulator* fate;
     bool aligned;
-    U4Recorder* recorder = nullptr;
     std::map<std::string, int> proc_death_counts;
     std::map<int, int> boundary_status_counts;
     std::mutex count_mtx;
@@ -350,10 +341,6 @@ struct G4OnlySteppingAction : G4UserSteppingAction
 
     void UserSteppingAction(const G4Step* aStep) override
     {
-        // Forward to U4Recorder first for random alignment
-        if (recorder)
-            recorder->UserSteppingAction(aStep);
-
         G4Track* track = aStep->GetTrack();
         if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
             return;
@@ -478,36 +465,19 @@ struct G4OnlySteppingAction : G4UserSteppingAction
 
 struct G4OnlyTrackingAction : G4UserTrackingAction
 {
-    U4Recorder* recorder = nullptr;
-
     void PreUserTrackingAction(const G4Track* track) override
     {
-        if (recorder)
-            recorder->PreUserTrackingAction(track);
+        if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
+            return;
+        int photon_idx = track->GetTrackID() - 1;  // G4 trackIDs are 1-based
+        U4Random::SetSequenceIndex(photon_idx);
     }
 
     void PostUserTrackingAction(const G4Track* track) override
     {
-        if (recorder)
-            recorder->PostUserTrackingAction(track);
-    }
-};
-
-// ---- AlignedOpticalPhysics: replaces G4OpticalPhysics with instrumented processes ----
-
-struct AlignedOpticalPhysics : G4VPhysicsConstructor
-{
-    AlignedOpticalPhysics() : G4VPhysicsConstructor("AlignedOptical") {}
-
-    void ConstructParticle() override {}  // optical photon already defined by FTFP_BERT
-
-    void ConstructProcess() override
-    {
-        auto* pm = G4OpticalPhoton::OpticalPhoton()->GetProcessManager();
-        pm->AddDiscreteProcess(new ShimG4OpAbsorption());
-        pm->AddDiscreteProcess(new ShimG4OpRayleigh());
-        pm->AddDiscreteProcess(new InstrumentedG4OpBoundaryProcess());
-        pm->AddDiscreteProcess(new G4OpWLS());
+        if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
+            return;
+        U4Random::SetSequenceIndex(-1);
     }
 };
 
@@ -516,26 +486,14 @@ struct AlignedOpticalPhysics : G4VPhysicsConstructor
 struct G4OnlyEventAction : G4UserEventAction
 {
     int total_events;
-    U4Recorder* recorder = nullptr;
 
     G4OnlyEventAction(int total_events) : total_events(total_events) {}
 
-    void BeginOfEventAction(const G4Event *event) override
-    {
-        if (recorder)
-        {
-            SEvt::AddTorchGenstep();
-            recorder->BeginOfEventAction_(event->GetEventID());
-        }
-    }
-
     void EndOfEventAction(const G4Event *event) override
     {
         int id = event->GetEventID();
         if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events)
             G4cout << "G4: Event " << id + 1 << "/" << total_events << G4endl;
-        if (recorder)
-            recorder->EndOfEventAction_(id);
     }
 };
 
@@ -591,25 +549,10 @@ struct G4OnlyActionInitialization : G4VUserActionInitialization
     void Build() const override
     {
         SetUserAction(new G4OnlyPrimaryGenerator(cfg, photons_per_event));
-
-        auto* evt_action = new G4OnlyEventAction(num_events);
-        auto* stepping = new G4OnlySteppingAction(fate, aligned);
-
-        if (aligned)
-        {
-            U4Recorder* rec = U4Recorder::Get();
-            if (!rec) rec = new U4Recorder();
-
-            evt_action->recorder = rec;
-            stepping->recorder = rec;
-
-            auto* tracking = new G4OnlyTrackingAction();
-            tracking->recorder = rec;
-            SetUserAction(tracking);
-        }
-
-        SetUserAction(evt_action);
+        SetUserAction(new G4OnlyEventAction(num_events));
         SetUserAction(new G4OnlyRunAction(accumulator, fate));
-        SetUserAction(stepping);
+        SetUserAction(new G4OnlySteppingAction(fate, aligned));
+        if (aligned)
+            SetUserAction(new G4OnlyTrackingAction());
     }
 };

From da61a75b9fba1138d8d82c15f0334bf5749f2f7d Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 21:21:08 +0000
Subject: [PATCH 04/14] add AlignedOpticalPhysics with ShimG4Op* for better
 RNILL alignment

Uses ShimG4OpAbsorption/ShimG4OpRayleigh instead of standard
G4OpticalPhysics when --aligned. Improves match from 95.7% to 97.7%
by matching the GPU's RNILL random consumption pattern.
---
 src/StandAloneGeant4Validation.cpp |  5 ++++-
 src/StandAloneGeant4Validation.h   | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp
index 91d9b080b..bd90a3d24 100644
--- a/src/StandAloneGeant4Validation.cpp
+++ b/src/StandAloneGeant4Validation.cpp
@@ -114,7 +114,10 @@ int main(int argc, char **argv)
         fate.Resize(total_photons);
 
     G4VModularPhysicsList *physics = new FTFP_BERT;
-    physics->RegisterPhysics(new G4OpticalPhysics);
+    if (aligned)
+        physics->RegisterPhysics(new AlignedOpticalPhysics);
+    else
+        physics->RegisterPhysics(new G4OpticalPhysics);
 
     if (use_mt)
     {
diff --git a/src/StandAloneGeant4Validation.h b/src/StandAloneGeant4Validation.h
index d1c0f0bdb..9dcaced17 100644
--- a/src/StandAloneGeant4Validation.h
+++ b/src/StandAloneGeant4Validation.h
@@ -29,7 +29,11 @@
 #include "G4VUserPrimaryGeneratorAction.hh"
 #include "G4OpBoundaryProcess.hh"
 #include "G4ProcessManager.hh"
+#include "G4VPhysicsConstructor.hh"
+#include "G4OpWLS.hh"
 
+#include "ShimG4OpAbsorption.hh"
+#include "ShimG4OpRayleigh.hh"
 #include "U4Random.hh"
 
 #include "sysrap/NP.hh"
@@ -481,6 +485,22 @@ struct G4OnlyTrackingAction : G4UserTrackingAction
     }
 };
 
+// ---- AlignedOpticalPhysics: uses Shim processes for precise RNILL matching ----
+
+struct AlignedOpticalPhysics : G4VPhysicsConstructor
+{
+    AlignedOpticalPhysics() : G4VPhysicsConstructor("AlignedOptical") {}
+    void ConstructParticle() override {}
+    void ConstructProcess() override
+    {
+        auto* pm = G4OpticalPhoton::OpticalPhoton()->GetProcessManager();
+        pm->AddDiscreteProcess(new ShimG4OpAbsorption());
+        pm->AddDiscreteProcess(new ShimG4OpRayleigh());
+        pm->AddDiscreteProcess(new G4OpBoundaryProcess());
+        pm->AddDiscreteProcess(new G4OpWLS());
+    }
+};
+
 // ---- Event Action: reports per-event progress ----
 
 struct G4OnlyEventAction : G4UserEventAction

From 5a24626bfc71252e629839c8fce0087d0b840af0 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 21:26:26 +0000
Subject: [PATCH 05/14] add standalone precooked curand sequence generator

Generates Philox random streams matching GPU seeding for U4Random
aligned mode. 113 lines, compiles with nvcc, no opticks dependencies
beyond NP.hh.
---
 tools/generate_precooked_rng.cu | 113 ++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 tools/generate_precooked_rng.cu

diff --git a/tools/generate_precooked_rng.cu b/tools/generate_precooked_rng.cu
new file mode 100644
index 000000000..79cb2910d
--- /dev/null
+++ b/tools/generate_precooked_rng.cu
@@ -0,0 +1,113 @@
+/**
+generate_precooked_rng.cu
+==========================
+
+Generates precooked curand Philox sequences for U4Random aligned mode.
+Each photon gets its own random stream matching the GPU simulation.
+
+Build:
+    nvcc -o generate_precooked_rng tools/generate_precooked_rng.cu \
+         -I. -I/opt/eic-opticks/include/eic-opticks -lcurand -std=c++17
+
+Usage:
+    ./generate_precooked_rng [num_photons] [num_randoms_per_photon]
+    Defaults: 100000 photons, 256 randoms each (nj=16, nk=16)
+
+Output:
+    ~/.opticks/precooked/QSimTest/rng_sequence/
+        rng_sequence_f_ni<NI>_nj<NJ>_nk<NK>_tranche<NI>/
+            rng_sequence_f_ni<NI>_nj<NJ>_nk<NK>_ioffset000000.npy
+*/
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <sys/stat.h>
+
+#include <curand_kernel.h>
+#include "sysrap/NP.hh"
+
+__global__ void generate_sequences(float* out, unsigned ni, unsigned nv, unsigned id_offset)
+{
+    unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx >= ni) return;
+
+    unsigned photon_idx = id_offset + idx;
+
+    // Match GPU simulation: curand_init(seed=0, subsequence=photon_idx, offset=0)
+    curandStatePhilox4_32_10_t rng;
+    curand_init(0ULL, (unsigned long long)photon_idx, 0ULL, &rng);
+
+    float* row = out + idx * nv;
+    for (unsigned j = 0; j < nv; j++)
+        row[j] = curand_uniform(&rng);
+}
+
+static void mkdirp(const char* path)
+{
+    char tmp[1024];
+    snprintf(tmp, sizeof(tmp), "%s", path);
+    for (char* p = tmp + 1; *p; p++)
+    {
+        if (*p == '/') { *p = 0; mkdir(tmp, 0755); *p = '/'; }
+    }
+    mkdir(tmp, 0755);
+}
+
+int main(int argc, char** argv)
+{
+    unsigned ni = 100000;
+    unsigned nj = 16;
+    unsigned nk = 16;
+
+    if (argc > 1) ni = atoi(argv[1]);
+    if (argc > 2)
+    {
+        unsigned total = atoi(argv[2]);
+        nj = 1; nk = total;
+        for (unsigned f = 2; f * f <= total; f++)
+        {
+            if (total % f == 0 && f <= 64) { nj = f; nk = total / f; }
+        }
+    }
+
+    unsigned nv = nj * nk;
+    printf("Generating precooked curand Philox sequences:\n");
+    printf("  photons: %u, randoms/photon: %u (nj=%u, nk=%u), memory: %.1f MB\n",
+           ni, nv, nj, nk, (double)ni * nv * sizeof(float) / (1024 * 1024));
+
+    const char* home = getenv("HOME");
+    char dirpath[512], filename[256], fullpath[768];
+
+    snprintf(dirpath, sizeof(dirpath),
+        "%s/.opticks/precooked/QSimTest/rng_sequence/rng_sequence_f_ni%u_nj%u_nk%u_tranche%u",
+        home, ni, nj, nk, ni);
+    mkdirp(dirpath);
+
+    snprintf(filename, sizeof(filename),
+        "rng_sequence_f_ni%u_nj%u_nk%u_ioffset%06u.npy", ni, nj, nk, 0);
+    snprintf(fullpath, sizeof(fullpath), "%s/%s", dirpath, filename);
+
+    float* d_out = nullptr;
+    cudaMalloc(&d_out, (size_t)ni * nv * sizeof(float));
+
+    unsigned threads = 256;
+    unsigned blocks = (ni + threads - 1) / threads;
+    generate_sequences<<<blocks, threads>>>(d_out, ni, nv, 0);
+    cudaDeviceSynchronize();
+
+    cudaError_t err = cudaGetLastError();
+    if (err != cudaSuccess) { fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(err)); return 1; }
+
+    NP* seq = NP::Make<float>(ni, nj, nk);
+    cudaMemcpy(seq->values<float>(), d_out, (size_t)ni * nv * sizeof(float), cudaMemcpyDeviceToHost);
+    cudaFree(d_out);
+
+    seq->save(fullpath);
+    printf("Saved: %s\n", fullpath);
+    printf("Set OPTICKS_RANDOM_SEQPATH=%s\n", fullpath);
+
+    delete seq;
+    return 0;
+}

From 38472ae913191584c67520ace5c8f1adb164b398 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 20:30:38 +0000
Subject: [PATCH 06/14] add photon-by-photon aligned comparison script

Compares GPU and G4 photon.npy arrays element-by-element:
flag match, position match, distributions, divergent photon listing.
---
 optiphy/ana/compare_aligned.py | 93 ++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 optiphy/ana/compare_aligned.py

diff --git a/optiphy/ana/compare_aligned.py b/optiphy/ana/compare_aligned.py
new file mode 100644
index 000000000..3ca37149f
--- /dev/null
+++ b/optiphy/ana/compare_aligned.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+"""
+compare_aligned.py - Photon-by-photon comparison of GPU vs G4 aligned simulations.
+
+Usage:
+    python compare_aligned.py <gpu_photon.npy> <g4_photon.npy>
+"""
+import sys
+import numpy as np
+
+def flag_name(f):
+    names = {
+        0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
+        0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
+        0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
+        0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", 0x8000: "MISS",
+    }
+    return names.get(f, f"0x{f:04x}")
+
+def extract_flag(photon):
+    """Extract flag from q3.x (orient_boundary_flag) - lower 16 bits."""
+    q3 = photon.view(np.uint32).reshape(-1, 4, 4)
+    return q3[:, 3, 0] & 0xFFFF
+
+def main():
+    if len(sys.argv) < 3:
+        print(f"Usage: {sys.argv[0]} <gpu_photon.npy> <g4_photon.npy>")
+        sys.exit(1)
+
+    gpu = np.load(sys.argv[1])
+    g4  = np.load(sys.argv[2])
+
+    print(f"GPU shape: {gpu.shape}")
+    print(f"G4  shape: {g4.shape}")
+
+    n = min(len(gpu), len(g4))
+    gpu = gpu[:n]
+    g4  = g4[:n]
+
+    gpu_flags = extract_flag(gpu)
+    g4_flags  = extract_flag(g4)
+
+    # Flag comparison
+    match = gpu_flags == g4_flags
+    n_match = match.sum()
+    n_diff = n - n_match
+    print(f"\nFlag comparison ({n} photons):")
+    print(f"  Matching: {n_match} ({100*n_match/n:.1f}%)")
+    print(f"  Differ:   {n_diff} ({100*n_diff/n:.1f}%)")
+
+    # Position comparison
+    gpu_pos = gpu[:, 0, :3]  # q0.xyz = position
+    g4_pos  = g4[:, 0, :3]
+
+    pos_diff = np.linalg.norm(gpu_pos - g4_pos, axis=1)
+    zero_g4 = np.all(g4_pos == 0, axis=1)  # G4 photon not recorded (indexed mode gaps)
+
+    valid = ~zero_g4
+    n_valid = valid.sum()
+    print(f"\nPosition comparison ({n_valid} valid G4 photons, {zero_g4.sum()} zero/unrecorded):")
+    if n_valid > 0:
+        vdiff = pos_diff[valid]
+        print(f"  Mean dist:   {vdiff.mean():.4f} mm")
+        print(f"  Max dist:    {vdiff.max():.4f} mm")
+        print(f"  < 0.01 mm:   {(vdiff < 0.01).sum()} ({100*(vdiff < 0.01).sum()/n_valid:.1f}%)")
+        print(f"  < 0.1 mm:    {(vdiff < 0.1).sum()} ({100*(vdiff < 0.1).sum()/n_valid:.1f}%)")
+        print(f"  < 1.0 mm:    {(vdiff < 1.0).sum()} ({100*(vdiff < 1.0).sum()/n_valid:.1f}%)")
+
+    # Flag distribution
+    print(f"\nGPU flag distribution:")
+    for f in sorted(set(gpu_flags)):
+        c = (gpu_flags == f).sum()
+        print(f"  {flag_name(f):20s}: {c:6d} ({100*c/n:.1f}%)")
+
+    print(f"\nG4 flag distribution (aligned):")
+    for f in sorted(set(g4_flags)):
+        c = (g4_flags == f).sum()
+        print(f"  {flag_name(f):20s}: {c:6d} ({100*c/n:.1f}%)")
+
+    # Show first few divergent photons
+    if n_diff > 0:
+        div_idx = np.where(~match)[0]
+        print(f"\nFirst 10 divergent photons:")
+        for i in div_idx[:10]:
+            gf = flag_name(gpu_flags[i])
+            cf = flag_name(g4_flags[i])
+            gp = gpu_pos[i]
+            cp = g4_pos[i]
+            print(f"  [{i:5d}] GPU: {gf:20s} pos=({gp[0]:8.2f},{gp[1]:8.2f},{gp[2]:8.2f})")
+            print(f"          G4:  {cf:20s} pos=({cp[0]:8.2f},{cp[1]:8.2f},{cp[2]:8.2f})")
+
+if __name__ == "__main__":
+    main()

From f7383bffcfe359ac020cae4b906db11e2004c377 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sat, 28 Mar 2026 22:05:47 +0000
Subject: [PATCH 07/14] add chi2 and glancing-angle analysis to comparison
 script

---
 optiphy/ana/compare_aligned.py | 168 +++++++++++++++++++++++++++------
 1 file changed, 141 insertions(+), 27 deletions(-)

diff --git a/optiphy/ana/compare_aligned.py b/optiphy/ana/compare_aligned.py
index 3ca37149f..b044ce6f2 100644
--- a/optiphy/ana/compare_aligned.py
+++ b/optiphy/ana/compare_aligned.py
@@ -4,24 +4,97 @@
 
 Usage:
     python compare_aligned.py <gpu_photon.npy> <g4_photon.npy>
+
+Performs:
+  1. Per-photon flag comparison (exact match rate)
+  2. Position comparison at multiple thresholds
+  3. Chi-squared test on flag distributions (gold-standard validation metric)
+  4. Glancing-angle photon identification (normal sign ambiguity)
+  5. Divergent photon listing
 """
 import sys
 import numpy as np
 
+FLAG_NAMES = {
+    0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
+    0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
+    0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
+    0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", 0x8000: "MISS",
+}
+
 def flag_name(f):
-    names = {
-        0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
-        0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
-        0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
-        0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", 0x8000: "MISS",
-    }
-    return names.get(f, f"0x{f:04x}")
+    return FLAG_NAMES.get(f, f"0x{f:04x}")
 
 def extract_flag(photon):
     """Extract flag from q3.x (orient_boundary_flag) - lower 16 bits."""
     q3 = photon.view(np.uint32).reshape(-1, 4, 4)
     return q3[:, 3, 0] & 0xFFFF
 
+def chi2_flag_distribution(gpu_flags, g4_flags):
+    """
+    Chi-squared comparison of flag distributions.
+
+    Compares the frequency of each flag value between GPU and G4.
+    This is the opticks gold-standard validation metric.
+
+    Returns (chi2, ndof, flags_used, gpu_counts, g4_counts).
+    """
+    all_flags = sorted(set(gpu_flags) | set(g4_flags))
+    gpu_counts = np.array([(gpu_flags == f).sum() for f in all_flags], dtype=float)
+    g4_counts  = np.array([(g4_flags == f).sum() for f in all_flags], dtype=float)
+
+    total = gpu_counts + g4_counts
+    mask = total > 0
+    gpu_c = gpu_counts[mask]
+    g4_c  = g4_counts[mask]
+    tot   = total[mask]
+    flags_used = [f for f, m in zip(all_flags, mask) if m]
+
+    n_gpu = gpu_c.sum()
+    n_g4  = g4_c.sum()
+    expected_gpu = tot * n_gpu / (n_gpu + n_g4)
+    expected_g4  = tot * n_g4  / (n_gpu + n_g4)
+
+    chi2 = 0.0
+    for i in range(len(flags_used)):
+        if expected_gpu[i] > 0:
+            chi2 += (gpu_c[i] - expected_gpu[i])**2 / expected_gpu[i]
+        if expected_g4[i] > 0:
+            chi2 += (g4_c[i] - expected_g4[i])**2 / expected_g4[i]
+
+    ndof = max(len(flags_used) - 1, 1)
+    return chi2, ndof, flags_used, gpu_c, g4_c
+
+def identify_glancing(gpu, g4):
+    """
+    Identify glancing-angle photons where the normal sign ambiguity
+    causes momentum negation between GPU and G4.
+
+    At glancing incidence cos(theta) ~ 0, float32 vs float64 can produce
+    opposite normal signs, reflecting the photon in the opposite direction.
+    These photons have matching flags but very different positions.
+
+    Returns boolean mask of glancing photons.
+    """
+    gpu_mom = gpu[:, 1, :3]
+    g4_mom  = g4[:, 1, :3]
+
+    # Normalize momenta (should already be unit vectors, but be safe)
+    gpu_norm = np.linalg.norm(gpu_mom, axis=1, keepdims=True)
+    g4_norm  = np.linalg.norm(g4_mom, axis=1, keepdims=True)
+    gpu_norm[gpu_norm == 0] = 1
+    g4_norm[g4_norm == 0] = 1
+
+    gpu_hat = gpu_mom / gpu_norm
+    g4_hat  = g4_mom / g4_norm
+
+    # Dot product of momentum directions: -1 = fully negated (normal flip)
+    mom_dot = np.sum(gpu_hat * g4_hat, axis=1)
+
+    # Glancing: momentum vectors are nearly anti-parallel (dot ~ -1)
+    glancing = mom_dot < -0.5
+    return glancing, mom_dot
+
 def main():
     if len(sys.argv) < 3:
         print(f"Usage: {sys.argv[0]} <gpu_photon.npy> <g4_photon.npy>")
@@ -40,24 +113,27 @@ def main():
     gpu_flags = extract_flag(gpu)
     g4_flags  = extract_flag(g4)
 
-    # Flag comparison
+    # ---- 1. Per-photon flag comparison ----
     match = gpu_flags == g4_flags
     n_match = match.sum()
     n_diff = n - n_match
-    print(f"\nFlag comparison ({n} photons):")
+    print(f"\n{'='*60}")
+    print(f"FLAG COMPARISON ({n} photons)")
+    print(f"{'='*60}")
     print(f"  Matching: {n_match} ({100*n_match/n:.1f}%)")
     print(f"  Differ:   {n_diff} ({100*n_diff/n:.1f}%)")
 
-    # Position comparison
-    gpu_pos = gpu[:, 0, :3]  # q0.xyz = position
+    # ---- 2. Position comparison ----
+    gpu_pos = gpu[:, 0, :3]
     g4_pos  = g4[:, 0, :3]
-
     pos_diff = np.linalg.norm(gpu_pos - g4_pos, axis=1)
-    zero_g4 = np.all(g4_pos == 0, axis=1)  # G4 photon not recorded (indexed mode gaps)
+    zero_g4 = np.all(g4_pos == 0, axis=1)
 
     valid = ~zero_g4
     n_valid = valid.sum()
-    print(f"\nPosition comparison ({n_valid} valid G4 photons, {zero_g4.sum()} zero/unrecorded):")
+    print(f"\n{'='*60}")
+    print(f"POSITION COMPARISON ({n_valid} valid, {zero_g4.sum()} unrecorded)")
+    print(f"{'='*60}")
     if n_valid > 0:
         vdiff = pos_diff[valid]
         print(f"  Mean dist:   {vdiff.mean():.4f} mm")
@@ -66,21 +142,59 @@ def main():
         print(f"  < 0.1 mm:    {(vdiff < 0.1).sum()} ({100*(vdiff < 0.1).sum()/n_valid:.1f}%)")
         print(f"  < 1.0 mm:    {(vdiff < 1.0).sum()} ({100*(vdiff < 1.0).sum()/n_valid:.1f}%)")
 
-    # Flag distribution
-    print(f"\nGPU flag distribution:")
-    for f in sorted(set(gpu_flags)):
-        c = (gpu_flags == f).sum()
-        print(f"  {flag_name(f):20s}: {c:6d} ({100*c/n:.1f}%)")
-
-    print(f"\nG4 flag distribution (aligned):")
-    for f in sorted(set(g4_flags)):
-        c = (g4_flags == f).sum()
-        print(f"  {flag_name(f):20s}: {c:6d} ({100*c/n:.1f}%)")
-
-    # Show first few divergent photons
+    # ---- 3. Chi-squared test on flag distributions ----
+    print(f"\n{'='*60}")
+    print(f"CHI-SQUARED TEST (flag distribution)")
+    print(f"{'='*60}")
+
+    chi2_val, ndof, flags_used, gpu_c, g4_c = chi2_flag_distribution(gpu_flags, g4_flags)
+
+    print(f"  {'Flag':<20s} {'GPU':>8s} {'G4':>8s} {'Diff':>8s}")
+    print(f"  {'-'*20} {'-'*8} {'-'*8} {'-'*8}")
+    for i, f in enumerate(flags_used):
+        diff = int(gpu_c[i] - g4_c[i])
+        sign = "+" if diff > 0 else ""
+        print(f"  {flag_name(f):<20s} {int(gpu_c[i]):>8d} {int(g4_c[i]):>8d} {sign}{diff:>7d}")
+
+    deviant_frac = 100 * n_diff / n if n > 0 else 0
+    print(f"\n  chi2/ndof = {chi2_val:.2f}/{ndof} = {chi2_val/ndof:.2f}")
+    print(f"  deviant fraction: {deviant_frac:.2f}% ({n_diff}/{n})")
+
+    # ---- 4. Glancing-angle analysis ----
+    print(f"\n{'='*60}")
+    print(f"GLANCING-ANGLE ANALYSIS (normal sign ambiguity)")
+    print(f"{'='*60}")
+
+    glancing, mom_dot = identify_glancing(gpu, g4)
+    n_glancing = glancing.sum()
+
+    # Among matching-flag photons, how many are glancing with large pos diff?
+    match_glancing = match & glancing
+    match_large_pos = match & (pos_diff > 1.0)
+    match_glancing_large = match & glancing & (pos_diff > 1.0)
+
+    print(f"  Glancing photons (mom dot < -0.5):  {n_glancing}")
+    print(f"  Matching flag + pos diff > 1mm:      {match_large_pos.sum()}")
+    print(f"  Of those, glancing:                  {match_glancing_large.sum()}")
+    if match_large_pos.sum() > 0:
+        frac = 100 * match_glancing_large.sum() / match_large_pos.sum()
+        print(f"  Fraction explained by glancing:      {frac:.0f}%")
+
+    # Position stats excluding glancing photons
+    non_glancing_match = match & ~glancing & valid
+    if non_glancing_match.sum() > 0:
+        ng_diff = pos_diff[non_glancing_match]
+        print(f"\n  Position (matching, non-glancing, {non_glancing_match.sum()} photons):")
+        print(f"    Max dist:  {ng_diff.max():.6f} mm")
+        print(f"    Mean dist: {ng_diff.mean():.6f} mm")
+        print(f"    < 0.01 mm: {(ng_diff < 0.01).sum()} ({100*(ng_diff < 0.01).sum()/non_glancing_match.sum():.1f}%)")
+
+    # ---- 5. Divergent photon listing ----
     if n_diff > 0:
         div_idx = np.where(~match)[0]
-        print(f"\nFirst 10 divergent photons:")
+        print(f"\n{'='*60}")
+        print(f"DIVERGENT PHOTONS (first 10 of {n_diff})")
+        print(f"{'='*60}")
         for i in div_idx[:10]:
             gf = flag_name(gpu_flags[i])
             cf = flag_name(g4_flags[i])

From e11167333a7d5a9646ca6486fbbd50d520ba901a Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Sun, 29 Mar 2026 00:12:08 +0000
Subject: [PATCH 08/14] add G4ValidationGenstep: electron primary for genstep
 validation

Fires configurable electron in LAr, G4 produces scintillation/Cerenkov
photons and tracks them to SiPM detection. For comparison with GPURaytrace
which uses G4CXOpticks to hand gensteps to GPU for optical propagation.

Tested: 1 MeV e- in det.gdml produces ~170 SiPM hits/event.
---
 optiphy/ana/run_genstep_comparison.py | 171 ++++++++++++++++
 src/CMakeLists.txt                    |  14 +-
 src/G4ValidationGenstep.cpp           | 114 +++++++++++
 src/G4ValidationGenstep.h             | 274 ++++++++++++++++++++++++++
 4 files changed, 571 insertions(+), 2 deletions(-)
 create mode 100644 optiphy/ana/run_genstep_comparison.py
 create mode 100644 src/G4ValidationGenstep.cpp
 create mode 100644 src/G4ValidationGenstep.h

diff --git a/optiphy/ana/run_genstep_comparison.py b/optiphy/ana/run_genstep_comparison.py
new file mode 100644
index 000000000..cd87230bb
--- /dev/null
+++ b/optiphy/ana/run_genstep_comparison.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+run_genstep_comparison.py
+==========================
+
+Runs GPU (simg4ox) and G4 (G4ValidationGenstep) simulations with the same
+electron primary, then compares the optical photon hit distributions.
+
+Usage:
+    python run_genstep_comparison.py [--gdml det.gdml] [--energy 1.0] [--nevents 10] [--seed 42]
+"""
+import os
+import sys
+import subprocess
+import argparse
+import numpy as np
+from pathlib import Path
+
+def find_gpu_hits():
+    """Find the most recent GPU hit.npy output."""
+    base = Path(f"/tmp/{os.environ.get('USER','MISSING_USER')}/opticks")
+    candidates = sorted(base.rglob("hit.npy"), key=lambda p: p.stat().st_mtime, reverse=True)
+    return str(candidates[0]) if candidates else None
+
+def run_g4(gdml, energy, nevents, seed, pos, direction):
+    """Run pure G4 simulation with electron primary."""
+    cmd = [
+        "G4ValidationGenstep",
+        "-g", gdml,
+        "-e", str(energy),
+        "-n", str(nevents),
+        "-s", str(seed),
+        "--pos", pos,
+        "--dir", direction,
+    ]
+    print(f"=== Running G4: {' '.join(cmd)}")
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+
+    # Extract hit count from output
+    g4_hits = 0
+    for line in result.stdout.split('\n'):
+        if "Total hits:" in line:
+            g4_hits = int(line.split("Total hits:")[-1].strip())
+
+    print(f"G4: {g4_hits} hits")
+    if result.returncode != 0:
+        print(f"G4 STDERR (last 5 lines):")
+        for line in result.stderr.strip().split('\n')[-5:]:
+            print(f"  {line}")
+    return g4_hits
+
+def run_gpu(gdml, config, macro, seed):
+    """Run GPU simulation via simg4ox."""
+    env = os.environ.copy()
+    env["OPTICKS_INTEGRATION_MODE"] = "1"  # Minimal mode: G4 tracks electron, GPU propagates optical
+
+    cmd = [
+        "simg4ox",
+        "-g", gdml,
+        "-c", config,
+        "-m", macro,
+    ]
+    print(f"\n=== Running GPU: {' '.join(cmd)}")
+    print(f"    OPTICKS_INTEGRATION_MODE=1 (Minimal)")
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+    if result.returncode != 0:
+        print(f"GPU STDERR (last 10 lines):")
+        for line in result.stderr.strip().split('\n')[-10:]:
+            print(f"  {line}")
+        return 0
+
+    # Find hit output
+    hit_path = find_gpu_hits()
+    if hit_path and os.path.exists(hit_path):
+        hits = np.load(hit_path)
+        print(f"GPU: {len(hits)} hits (from {hit_path})")
+        return len(hits)
+    else:
+        print("GPU: no hit.npy found")
+        return 0
+
+def compare_hits(g4_path, gpu_path):
+    """Compare G4 and GPU hit arrays."""
+    if not os.path.exists(g4_path):
+        print(f"G4 hits not found: {g4_path}")
+        return
+    if not gpu_path or not os.path.exists(gpu_path):
+        print(f"GPU hits not found")
+        return
+
+    g4 = np.load(g4_path)
+    gpu = np.load(gpu_path)
+
+    print(f"\n{'='*60}")
+    print(f"HIT COMPARISON")
+    print(f"{'='*60}")
+    print(f"  G4 hits:  {len(g4)}")
+    print(f"  GPU hits: {len(gpu)}")
+
+    if len(g4) > 0 and len(gpu) > 0:
+        diff = len(gpu) - len(g4)
+        pct = 100 * diff / len(g4) if len(g4) > 0 else 0
+        sign = "+" if diff > 0 else ""
+        print(f"  Diff:     {sign}{diff} ({sign}{pct:.1f}%)")
+
+    # Position distributions
+    if len(g4) > 0:
+        g4_pos = g4[:, 0, :3]
+        print(f"\n  G4 hit positions:")
+        print(f"    x: [{g4_pos[:,0].min():.1f}, {g4_pos[:,0].max():.1f}] mm")
+        print(f"    y: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm")
+        print(f"    z: [{g4_pos[:,2].min():.1f}, {g4_pos[:,2].max():.1f}] mm")
+
+    if len(gpu) > 0:
+        gpu_pos = gpu[:, 0, :3]
+        print(f"\n  GPU hit positions:")
+        print(f"    x: [{gpu_pos[:,0].min():.1f}, {gpu_pos[:,0].max():.1f}] mm")
+        print(f"    y: [{gpu_pos[:,1].min():.1f}, {gpu_pos[:,1].max():.1f}] mm")
+        print(f"    z: [{gpu_pos[:,2].min():.1f}, {gpu_pos[:,2].max():.1f}] mm")
+
+    # Wavelength distributions
+    if len(g4) > 0:
+        g4_wl = g4[:, 2, 3]
+        print(f"\n  G4 wavelength:  mean={g4_wl.mean():.1f} std={g4_wl.std():.1f} nm")
+    if len(gpu) > 0:
+        gpu_wl = gpu[:, 2, 3]
+        print(f"  GPU wavelength: mean={gpu_wl.mean():.1f} std={gpu_wl.std():.1f} nm")
+
+    # Time distributions
+    if len(g4) > 0:
+        g4_t = g4[:, 0, 3]
+        print(f"\n  G4 time:  mean={g4_t.mean():.2f} max={g4_t.max():.2f} ns")
+    if len(gpu) > 0:
+        gpu_t = gpu[:, 0, 3]
+        print(f"  GPU time: mean={gpu_t.mean():.2f} max={gpu_t.max():.2f} ns")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compare GPU vs G4 electron genstep simulation")
+    parser.add_argument("--gdml", default="det.gdml", help="GDML geometry file")
+    parser.add_argument("--energy", type=float, default=1.0, help="Electron energy in MeV")
+    parser.add_argument("--nevents", type=int, default=10, help="Number of events")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed")
+    parser.add_argument("--pos", default="0,0,100", help="Electron position x,y,z mm")
+    parser.add_argument("--dir", default="0,0,1", help="Electron direction x,y,z")
+    args = parser.parse_args()
+
+    # Run G4
+    g4_hits = run_g4(args.gdml, args.energy, args.nevents, args.seed, args.pos, args.dir)
+
+    # Compare
+    g4_path = "g4_genstep_hits.npy"
+    gpu_path = find_gpu_hits()
+
+    if os.path.exists(g4_path):
+        g4 = np.load(g4_path)
+        print(f"\n{'='*60}")
+        print(f"G4 RESULTS ({args.nevents} events, {args.energy} MeV electron)")
+        print(f"{'='*60}")
+        print(f"  Total hits: {len(g4)}")
+        print(f"  Hits/event: {len(g4)/args.nevents:.1f}")
+        if len(g4) > 0:
+            g4_wl = g4[:, 2, 3]
+            g4_pos = g4[:, 0, :3]
+            print(f"  Wavelength: mean={g4_wl.mean():.1f} nm")
+            print(f"  Hit y range: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 99c3aba0b..996a91cb3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -78,13 +78,23 @@ target_include_directories(GPUPhotonFileSource PRIVATE
 )
 
 # StandAloneGeant4Validation - pure G4 optical photon simulation (no opticks GPU)
+# Links U4 for aligned mode (U4Random, InstrumentedG4OpBoundaryProcess, ShimG4Op*)
 add_executable(StandAloneGeant4Validation StandAloneGeant4Validation.cpp StandAloneGeant4Validation.h)
-target_link_libraries(StandAloneGeant4Validation gphox gphox_g4_deps)
+target_link_libraries(StandAloneGeant4Validation gphox gphox_g4_deps U4)
+target_compile_definitions(StandAloneGeant4Validation PRIVATE WITH_INSTRUMENTED_DEBUG)
 target_include_directories(StandAloneGeant4Validation PRIVATE
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
 )
 
+# G4ValidationGenstep - pure G4 electron→scintillation/Cerenkov→optical photon simulation
+add_executable(G4ValidationGenstep G4ValidationGenstep.cpp G4ValidationGenstep.h)
+target_link_libraries(G4ValidationGenstep gphox gphox_g4_deps)
+target_include_directories(G4ValidationGenstep PRIVATE
+    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
+    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
+)
+
 # simtox creates a numpy file with initial photons for simulation
 add_executable(simtox simtox.cpp)
 
@@ -95,7 +105,7 @@ target_include_directories(simtox PRIVATE
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
 )
 
-install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource StandAloneGeant4Validation simtox gphox gphox_g4_deps
+install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource StandAloneGeant4Validation G4ValidationGenstep simtox gphox gphox_g4_deps
     EXPORT ${PROJECT_NAME}Targets
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
diff --git a/src/G4ValidationGenstep.cpp b/src/G4ValidationGenstep.cpp
new file mode 100644
index 000000000..f27597822
--- /dev/null
+++ b/src/G4ValidationGenstep.cpp
@@ -0,0 +1,114 @@
+#include <string>
+
+#include <argparse/argparse.hpp>
+
+#include "FTFP_BERT.hh"
+#include "G4OpticalPhysics.hh"
+#include "G4RunManager.hh"
+#include "G4VModularPhysicsList.hh"
+#include "G4UImanager.hh"
+
+#include "G4ValidationGenstep.h"
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+    argparse::ArgumentParser program("G4ValidationGenstep", "0.0.0");
+
+    string gdml_file;
+    double energy_MeV = 1.0;
+    int num_events = 1;
+
+    program.add_argument("-g", "--gdml")
+        .help("path to GDML file")
+        .default_value(string("det.gdml"))
+        .nargs(1)
+        .store_into(gdml_file);
+
+    program.add_argument("-e", "--energy")
+        .help("electron kinetic energy in MeV")
+        .default_value(1.0)
+        .scan<'g', double>()
+        .store_into(energy_MeV);
+
+    program.add_argument("-n", "--nevents")
+        .help("number of events")
+        .default_value(1)
+        .scan<'i', int>()
+        .store_into(num_events);
+
+    program.add_argument("-s", "--seed")
+        .help("random seed")
+        .scan<'i', long>();
+
+    program.add_argument("--pos")
+        .help("electron position x,y,z in mm (comma-separated)")
+        .default_value(string("0,0,0"));
+
+    program.add_argument("--dir")
+        .help("electron direction x,y,z (comma-separated)")
+        .default_value(string("0,0,1"));
+
+    try
+    {
+        program.parse_args(argc, argv);
+    }
+    catch (const exception &err)
+    {
+        cerr << err.what() << endl;
+        cerr << program;
+        exit(EXIT_FAILURE);
+    }
+
+    long seed;
+    if (program.is_used("--seed"))
+        seed = program.get<long>("--seed");
+    else
+        seed = static_cast<long>(time(nullptr));
+
+    // Parse position
+    G4ThreeVector pos(0, 0, 0);
+    {
+        string s = program.get<string>("--pos");
+        float x, y, z;
+        if (sscanf(s.c_str(), "%f,%f,%f", &x, &y, &z) == 3)
+            pos = G4ThreeVector(x, y, z);
+    }
+
+    // Parse direction
+    G4ThreeVector dir(0, 0, 1);
+    {
+        string s = program.get<string>("--dir");
+        float x, y, z;
+        if (sscanf(s.c_str(), "%f,%f,%f", &x, &y, &z) == 3)
+            dir = G4ThreeVector(x, y, z);
+    }
+
+    G4cout << "G4ValidationGenstep:" << G4endl;
+    G4cout << "  GDML:     " << gdml_file << G4endl;
+    G4cout << "  Energy:   " << energy_MeV << " MeV" << G4endl;
+    G4cout << "  Events:   " << num_events << G4endl;
+    G4cout << "  Position: (" << pos.x() << "," << pos.y() << "," << pos.z() << ") mm" << G4endl;
+    G4cout << "  Direction: (" << dir.x() << "," << dir.y() << "," << dir.z() << ")" << G4endl;
+    G4cout << "  Seed:     " << seed << G4endl;
+
+    GenstepHitAccumulator accumulator;
+
+    G4VModularPhysicsList *physics = new FTFP_BERT;
+    physics->RegisterPhysics(new G4OpticalPhysics);
+
+    G4RunManager run_mgr;
+    run_mgr.SetUserInitialization(physics);
+    run_mgr.SetUserInitialization(new GenstepDetectorConstruction(gdml_file, &accumulator));
+    run_mgr.SetUserInitialization(
+        new GenstepActionInitialization(&accumulator, pos, dir, energy_MeV, num_events));
+    run_mgr.Initialize();
+
+    CLHEP::HepRandom::setTheSeed(seed);
+
+    G4cout << "G4Genstep: Starting " << num_events << " events..." << G4endl;
+    run_mgr.BeamOn(num_events);
+
+    return EXIT_SUCCESS;
+}
diff --git a/src/G4ValidationGenstep.h b/src/G4ValidationGenstep.h
new file mode 100644
index 000000000..56ec85bf9
--- /dev/null
+++ b/src/G4ValidationGenstep.h
@@ -0,0 +1,274 @@
+#pragma once
+/**
+G4ValidationGenstep.h
+======================
+
+Pure G4 simulation with electron primary that produces scintillation/Cerenkov
+optical photons. G4 handles all physics including optical photon propagation.
+Collects hits via sensitive detector. Used as the CPU reference for comparison
+with GPU (simg4ox) genstep-based optical simulation.
+**/
+
+#include <filesystem>
+#include <mutex>
+#include <vector>
+
+#include "G4Event.hh"
+#include "G4GDMLParser.hh"
+#include "G4THitsCollection.hh"
+#include "G4VHit.hh"
+#include "G4OpticalPhoton.hh"
+#include "G4Electron.hh"
+#include "G4PhysicalConstants.hh"
+#include "G4PrimaryParticle.hh"
+#include "G4PrimaryVertex.hh"
+#include "G4Run.hh"
+#include "G4SDManager.hh"
+#include "G4SystemOfUnits.hh"
+#include "G4ThreeVector.hh"
+#include "G4Track.hh"
+#include "G4TrackStatus.hh"
+#include "G4UserEventAction.hh"
+#include "G4UserRunAction.hh"
+#include "G4VPhysicalVolume.hh"
+#include "G4VUserActionInitialization.hh"
+#include "G4VUserDetectorConstruction.hh"
+#include "G4VUserPrimaryGeneratorAction.hh"
+
+#include "sysrap/NP.hh"
+#include "sysrap/sphoton.h"
+
+// ---- Hit accumulator ----
+
+struct GenstepHitAccumulator
+{
+    std::mutex mtx;
+    std::vector<sphoton> hits;
+    int total_optical_photons = 0;
+    int total_scintillation = 0;
+    int total_cerenkov = 0;
+
+    void AddHits(const std::vector<sphoton> &event_hits)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        hits.insert(hits.end(), event_hits.begin(), event_hits.end());
+    }
+
+    void Save(const char *filename)
+    {
+        std::lock_guard<std::mutex> lock(mtx);
+        G4int num_hits = hits.size();
+        NP *arr = NP::Make<float>(num_hits, 4, 4);
+        for (int i = 0; i < num_hits; i++)
+        {
+            float *data = reinterpret_cast<float *>(&hits[i]);
+            std::copy(data, data + 16, arr->values<float>() + i * 16);
+        }
+        arr->save(filename);
+        delete arr;
+        G4cout << "G4Genstep: Saved " << num_hits << " hits to " << filename << G4endl;
+    }
+};
+
+// ---- Sensitive Detector ----
+
+struct GenstepPhotonHit : public G4VHit
+{
+    GenstepPhotonHit() = default;
+
+    GenstepPhotonHit(G4double energy, G4double time, G4ThreeVector position,
+                     G4ThreeVector direction, G4ThreeVector polarization)
+        : photon()
+    {
+        photon.pos = {static_cast<float>(position.x()),
+                      static_cast<float>(position.y()),
+                      static_cast<float>(position.z())};
+        photon.time = time;
+        photon.mom = {static_cast<float>(direction.x()),
+                      static_cast<float>(direction.y()),
+                      static_cast<float>(direction.z())};
+        photon.pol = {static_cast<float>(polarization.x()),
+                      static_cast<float>(polarization.y()),
+                      static_cast<float>(polarization.z())};
+        photon.wavelength = h_Planck * c_light / (energy * CLHEP::eV);
+    }
+
+    void Print() override { G4cout << photon << G4endl; }
+    sphoton photon;
+};
+
+using GenstepPhotonHitsCollection = G4THitsCollection<GenstepPhotonHit>;
+
+struct GenstepPhotonSD : public G4VSensitiveDetector
+{
+    GenstepHitAccumulator *accumulator;
+
+    GenstepPhotonSD(G4String name, GenstepHitAccumulator *acc)
+        : G4VSensitiveDetector(name), accumulator(acc)
+    {
+        collectionName.insert(name + "_HC");
+    }
+
+    void Initialize(G4HCofThisEvent *hce) override
+    {
+        fHC = new GenstepPhotonHitsCollection(SensitiveDetectorName, collectionName[0]);
+        if (fHCID < 0)
+            fHCID = G4SDManager::GetSDMpointer()->GetCollectionID(collectionName[0]);
+        hce->AddHitsCollection(fHCID, fHC);
+    }
+
+    G4bool ProcessHits(G4Step *aStep, G4TouchableHistory *) override
+    {
+        G4Track *track = aStep->GetTrack();
+        if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition())
+            return false;
+
+        fHC->insert(new GenstepPhotonHit(
+            track->GetTotalEnergy(),
+            track->GetGlobalTime(),
+            aStep->GetPostStepPoint()->GetPosition(),
+            aStep->GetPostStepPoint()->GetMomentumDirection(),
+            aStep->GetPostStepPoint()->GetPolarization()));
+
+        track->SetTrackStatus(fStopAndKill);
+        return true;
+    }
+
+    void EndOfEvent(G4HCofThisEvent *) override
+    {
+        G4int n = fHC->entries();
+        std::vector<sphoton> event_hits;
+        event_hits.reserve(n);
+        for (GenstepPhotonHit *hit : *fHC->GetVector())
+            event_hits.push_back(hit->photon);
+        accumulator->AddHits(event_hits);
+    }
+
+  private:
+    GenstepPhotonHitsCollection *fHC = nullptr;
+    G4int fHCID = -1;
+};
+
+// ---- Detector Construction ----
+
+struct GenstepDetectorConstruction : G4VUserDetectorConstruction
+{
+    GenstepDetectorConstruction(std::filesystem::path gdml_file, GenstepHitAccumulator *acc)
+        : gdml_file_(gdml_file), accumulator_(acc) {}
+
+    G4VPhysicalVolume *Construct() override
+    {
+        parser_.Read(gdml_file_.string(), false);
+        return parser_.GetWorldVolume();
+    }
+
+    void ConstructSDandField() override
+    {
+        G4SDManager *SDman = G4SDManager::GetSDMpointer();
+        const G4GDMLAuxMapType *auxmap = parser_.GetAuxMap();
+
+        for (auto const &[logVol, listType] : *auxmap)
+        {
+            for (auto const &auxtype : listType)
+            {
+                if (auxtype.type == "SensDet")
+                {
+                    G4String name = logVol->GetName() + "_" + auxtype.value;
+                    G4cout << "G4Genstep: Attaching SD to " << logVol->GetName() << G4endl;
+                    GenstepPhotonSD *sd = new GenstepPhotonSD(name, accumulator_);
+                    SDman->AddNewDetector(sd);
+                    logVol->SetSensitiveDetector(sd);
+                }
+            }
+        }
+    }
+
+  private:
+    std::filesystem::path gdml_file_;
+    G4GDMLParser parser_;
+    GenstepHitAccumulator *accumulator_;
+};
+
+// ---- Electron Primary Generator ----
+
+struct ElectronPrimaryGenerator : G4VUserPrimaryGeneratorAction
+{
+    G4ThreeVector position;
+    G4ThreeVector direction;
+    G4double energy_MeV;
+
+    ElectronPrimaryGenerator(G4ThreeVector pos, G4ThreeVector dir, G4double energy)
+        : position(pos), direction(dir.unit()), energy_MeV(energy) {}
+
+    void GeneratePrimaries(G4Event *event) override
+    {
+        G4PrimaryVertex *vertex = new G4PrimaryVertex(position, 0.0);
+        G4PrimaryParticle *particle = new G4PrimaryParticle(G4Electron::Definition());
+        particle->SetKineticEnergy(energy_MeV * MeV);
+        particle->SetMomentumDirection(direction);
+        vertex->SetPrimary(particle);
+        event->AddPrimaryVertex(vertex);
+    }
+};
+
+// ---- Event Action with optical photon counting ----
+
+struct GenstepEventAction : G4UserEventAction
+{
+    GenstepHitAccumulator *accumulator;
+    int total_events;
+
+    GenstepEventAction(GenstepHitAccumulator *acc, int total)
+        : accumulator(acc), total_events(total) {}
+
+    void EndOfEventAction(const G4Event *event) override
+    {
+        int id = event->GetEventID();
+        if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events)
+            G4cout << "G4Genstep: Event " << id + 1 << "/" << total_events << G4endl;
+    }
+};
+
+// ---- Run Action ----
+
+struct GenstepRunAction : G4UserRunAction
+{
+    GenstepHitAccumulator *accumulator;
+
+    GenstepRunAction(GenstepHitAccumulator *acc) : accumulator(acc) {}
+
+    void EndOfRunAction(const G4Run *) override
+    {
+        G4cout << "G4Genstep: Total hits: " << accumulator->hits.size() << G4endl;
+        accumulator->Save("g4_genstep_hits.npy");
+    }
+};
+
+// ---- Action Initialization ----
+
+struct GenstepActionInitialization : G4VUserActionInitialization
+{
+    GenstepHitAccumulator *accumulator;
+    G4ThreeVector position;
+    G4ThreeVector direction;
+    G4double energy_MeV;
+    int num_events;
+
+    GenstepActionInitialization(GenstepHitAccumulator *acc,
+                                G4ThreeVector pos, G4ThreeVector dir,
+                                G4double energy, int nevt)
+        : accumulator(acc), position(pos), direction(dir),
+          energy_MeV(energy), num_events(nevt) {}
+
+    void BuildForMaster() const override
+    {
+        SetUserAction(new GenstepRunAction(accumulator));
+    }
+
+    void Build() const override
+    {
+        SetUserAction(new ElectronPrimaryGenerator(position, direction, energy_MeV));
+        SetUserAction(new GenstepEventAction(accumulator, num_events));
+        SetUserAction(new GenstepRunAction(accumulator));
+    }
+};

From c241640e4b8ffd1b8c0bfbe9e97890965331999c Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Thu, 2 Apr 2026 14:57:28 +0000
Subject: [PATCH 09/14] allow OPTICKS_MAX_RECORD env var to override DebugLite
 default

Relax MaxRecord assert to permit values above sseq::SLOTS (32).
In DebugLite mode, respect OPTICKS_MAX_RECORD env var if set,
falling back to the default record_limit (32) otherwise. This
enables full step history recording (e.g. 1000 steps) for photon
path analysis without affecting production performance.
---
 sysrap/SEventConfig.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sysrap/SEventConfig.cc b/sysrap/SEventConfig.cc
index a33fbf489..e978b1a4d 100644
--- a/sysrap/SEventConfig.cc
+++ b/sysrap/SEventConfig.cc
@@ -778,7 +778,7 @@ void SEventConfig::LIMIT_Check()
    //assert( _MaxBounce >= 0 && _MaxBounce <  LIMIT ) ;
    // MaxBounce should not in principal be limited
 
-   assert( _MaxRecord >= 0 && _MaxRecord <= RecordLimit() ) ;
+   assert( _MaxRecord >= 0 ) ;  // RecordLimit relaxed to allow large record arrays for step analysis
    assert( _MaxRec    >= 0 && _MaxRec    <= RecordLimit() ) ;
    assert( _MaxPrd    >= 0 && _MaxPrd    <= RecordLimit() ) ;
 
@@ -1596,7 +1596,8 @@ void SEventConfig::Initialize_Comp_Simulate_(unsigned& gather_mask, unsigned& sa
         else if(IsDebugLite())
         {
             SEventConfig::SetMaxRec(0);
-            SEventConfig::SetMaxRecord(record_limit);
+            int env_max_record = ssys::getenvint(kMaxRecord, 0);
+            SEventConfig::SetMaxRecord(env_max_record > 0 ? env_max_record : record_limit);
             SEventConfig::SetMaxSeq(1);  // formerly incorrectly set to max_bounce+1
         }
 

From a077692fea87cdb111764fa9718bcd198f1460bf Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Thu, 2 Apr 2026 02:45:27 +0000
Subject: [PATCH 10/14] fix WLS time profile: use exponential instead of
 default delta

G4OpticalPhysics defaults to delta time profile for G4OpWLS,
which applies a fixed delay equal to WLSTIMECONSTANT. The
physically correct model is exponential decay sampling:
dt = -WLSTIMECONSTANT * log(u). The GPU implementation already
uses exponential. This fix aligns G4 with GPU and with the
correct stochastic decay physics.
---
 src/GPURaytrace.cpp                | 2 ++
 src/StandAloneGeant4Validation.cpp | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/src/GPURaytrace.cpp b/src/GPURaytrace.cpp
index c97c7ec11..28b021524 100644
--- a/src/GPURaytrace.cpp
+++ b/src/GPURaytrace.cpp
@@ -4,6 +4,7 @@
 
 #include "FTFP_BERT.hh"
 #include "G4OpticalPhysics.hh"
+#include "G4OpticalParameters.hh"
 #include "G4VModularPhysicsList.hh"
 
 #include "G4UIExecutive.hh"
@@ -102,6 +103,7 @@ int main(int argc, char **argv)
     // The physics list must be instantiated before other user actions
     G4VModularPhysicsList *physics = new FTFP_BERT;
     physics->RegisterPhysics(new G4OpticalPhysics);
+    G4OpticalParameters::Instance()->SetWLSTimeProfile("exponential");
 
     auto *run_mgr = G4RunManagerFactory::CreateRunManager();
     run_mgr->SetUserInitialization(physics);
diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp
index bd90a3d24..0885a7d58 100644
--- a/src/StandAloneGeant4Validation.cpp
+++ b/src/StandAloneGeant4Validation.cpp
@@ -10,6 +10,8 @@
 #include "G4VModularPhysicsList.hh"
 #include "G4UImanager.hh"
 
+#include "G4OpticalParameters.hh"
+
 #include "StandAloneGeant4Validation.h"
 #include "config.h"
 
@@ -119,6 +121,9 @@ int main(int argc, char **argv)
     else
         physics->RegisterPhysics(new G4OpticalPhysics);
 
+    // Use exponential WLS time profile (default is delta = zero delay)
+    G4OpticalParameters::Instance()->SetWLSTimeProfile("exponential");
+
     if (use_mt)
     {
         auto *run_mgr = new G4MTRunManager;

From ecdf0085593111df3baaf6e43c380cb635865597 Mon Sep 17 00:00:00 2001
From: Gabor Galgoczi <gaborgalgoczi@gmail.com>
Date: Thu, 2 Apr 2026 13:17:48 +0000
Subject: [PATCH 11/14] revert WLS time profile from GPURaytrace

SetWLSTimeProfile in GPURaytrace changed G4OpticalParameters
during physics list initialization, inadvertently altering the
electron tracking (different genstep count). The WLS time profile
setting only matters for StandAloneGeant4Validation where G4
tracks optical photons directly. In GPURaytrace, the GPU handles
optical physics so the G4 time profile is irrelevant.
---
 src/GPURaytrace.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/GPURaytrace.cpp b/src/GPURaytrace.cpp
index 28b021524..c97c7ec11 100644
--- a/src/GPURaytrace.cpp
+++ b/src/GPURaytrace.cpp
@@ -4,7 +4,6 @@
 
 #include "FTFP_BERT.hh"
 #include "G4OpticalPhysics.hh"
-#include "G4OpticalParameters.hh"
 #include "G4VModularPhysicsList.hh"
 
 #include "G4UIExecutive.hh"
@@ -103,7 +102,6 @@ int main(int argc, char **argv)
     // The physics list must be instantiated before other user actions
     G4VModularPhysicsList *physics = new FTFP_BERT;
     physics->RegisterPhysics(new G4OpticalPhysics);
-    G4OpticalParameters::Instance()->SetWLSTimeProfile("exponential");
 
     auto *run_mgr = G4RunManagerFactory::CreateRunManager();
     run_mgr->SetUserInitialization(physics);

From 965806916a91737054e51c88738a7726e52a0656 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmixsmi@gmail.com>
Date: Thu, 26 Mar 2026 18:29:45 -0400
Subject: [PATCH 12/14] fix(geant4): use public G4MultiUnion navigation API

Update `G4MultiUnion` navigation to use the public Geant4 API and finalize generated multi-unions
with `Voxelize()`.

- remove the `G4MultiUnion`-specific `NoVoxels` path from `sysrap/ssolid.h`
- use the standard `G4VSolid` navigation calls:
  - `Inside`
  - `DistanceToIn`
  - `DistanceToOut`
- enable `Voxelize()` in `U4SolidMaker::GridMultiUnion_`

Geant4 11.4.1 no longer permits external use of `G4MultiUnion::InsideNoVoxels`, which breaks the
build.

Rather than keeping a version-specific workaround, this change aligns with the public Geant4
navigation interface and ensures `G4MultiUnion` instances are finalized for voxel-aware navigation.

This change intentionally removes an older `G4MultiUnion` special case that used Geant4's
`*NoVoxels` navigation methods.

Relevant history in this repo:

- `61b075287` `check G4MultiUnion BoxGrid and OrbGrid` This appears to be the origin of the pattern.
  It introduced a `DistanceMultiUnionNoVoxels_` helper using `InsideNoVoxels`,
  `DistanceToInNoVoxels`, and `DistanceToOutNoVoxels`. In the same change, `GridMultiUnion_` was
  added with `Voxelize()` commented out, which suggests the no-voxel path was deliberately paired
  with partially constructed `G4MultiUnion` test geometry.

- `9bfe9c94f` `pull x4solid.h Distance functions out of X4Intersect for usage from X4Simtrace` This
  propagated the same `G4MultiUnion` no-voxel logic into a reusable utility layer.

- `e614494e3` `generalize Geant4 volume/solid intersect plotting to any level of transforms using
  U4Tree/stree in u4/tests/U4PMTFastSimGeomTest` This copied the same special case into
  `sysrap/ssolid.h`, which is the direct ancestor of the code changed in this PR.

There is also later related context in:

- `079896e04` `enable geometry translation ... to avoid G4 voxelization SEGV` This indicates there
  were broader concerns in the codebase around Geant4 voxelization stability for some geometry
  flows.

Taken together, the historical pattern suggests the `NoVoxels` path was introduced as a pragmatic
workaround for `G4MultiUnion` behavior in older testing and translation workflows, especially when
`Voxelize()` was not always being called.

This PR moves back to the public Geant4 navigation API and restores the expected `Voxelize()` step
for generated `G4MultiUnion` grid solids, which is required for the standard navigation path and
    also avoids reliance on private/internal Geant4 APIs in 11.4.x.
---
 sysrap/ssolid.h    | 22 +---------------------
 u4/U4SolidMaker.cc |  6 ++----
 2 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/sysrap/ssolid.h b/sysrap/ssolid.h
index dd1a2f901..e63364c31 100644
--- a/sysrap/ssolid.h
+++ b/sysrap/ssolid.h
@@ -15,7 +15,6 @@ ssolid.h
 #include "sgeomdefs.h"
 
 #include "G4VSolid.hh"
-#include "G4MultiUnion.hh"
 #include "G4ThreeVector.hh"
 
 struct ssolid
@@ -25,9 +24,6 @@ struct ssolid
     static G4double Distance_(const G4VSolid* solid, const G4ThreeVector& pos, const G4ThreeVector& dir, EInside& in, 
              G4ThreeVector* isect=nullptr  
          ); 
-    static G4double DistanceMultiUnionNoVoxels_(
-                          const G4MultiUnion* solid, const G4ThreeVector& pos, const G4ThreeVector& dir, EInside& in ); 
-
     static G4double Distance(const G4VSolid* solid, const G4ThreeVector& pos, const G4ThreeVector& dir, bool dump ); 
     static void Simtrace( quad4& p, const G4VSolid* solid, bool dump=false); 
 }; 
@@ -86,26 +82,10 @@ inline G4double ssolid::Distance_(const G4VSolid* solid, const G4ThreeVector& po
 
 
 
-inline G4double ssolid::DistanceMultiUnionNoVoxels_(const G4MultiUnion* solid, const G4ThreeVector& pos, const G4ThreeVector& dir, EInside& in ) // static
-{
-    in =  solid->InsideNoVoxels(pos) ; 
-    G4double t = kInfinity ; 
-    switch( in )
-    {
-        case kInside:  t = solid->DistanceToOutNoVoxels( pos, dir, nullptr ) ; break ; 
-        case kSurface: t = solid->DistanceToOutNoVoxels( pos, dir, nullptr ) ; break ; 
-        case kOutside: t = solid->DistanceToInNoVoxels(  pos, dir ) ; break ; 
-        default:  assert(0) ; 
-    }
-    return t ; 
-}
-
-
 inline G4double ssolid::Distance(const G4VSolid* solid, const G4ThreeVector& pos, const G4ThreeVector& dir, bool dump ) // static
 {
     EInside in ; 
-    const G4MultiUnion* m = dynamic_cast<const G4MultiUnion*>(solid) ; 
-    G4double t = m ? DistanceMultiUnionNoVoxels_(m, pos, dir, in ) : Distance_( solid, pos, dir, in  );  
+    G4double t = Distance_( solid, pos, dir, in );
 
     if(dump && t != kInfinity)
     {
diff --git a/u4/U4SolidMaker.cc b/u4/U4SolidMaker.cc
index 6fcf079dc..a07a64c4e 100644
--- a/u4/U4SolidMaker.cc
+++ b/u4/U4SolidMaker.cc
@@ -1617,7 +1617,8 @@ const G4VSolid* U4SolidMaker::GridMultiUnion_(const char* name, G4VSolid* item,
         grid->AddNode(*item, tr);
 
     }
-    //grid->Voxelize();
+    // The Geant4 header in G4MultiUnion.hh explicitly says Voxelize() must be called once before navigation use.
+    grid->Voxelize();
     return grid ;
 }
 
@@ -3373,6 +3374,3 @@ const G4VSolid* U4SolidMaker::LProfileSectorPolycone( const char* )
 {
     return EMFCoil::Example(16);
 }
-
-
-

From da349bba8711b706e78919cf368aa662179c9854 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmixsmi@gmail.com>
Date: Mon, 30 Mar 2026 18:20:56 -0400
Subject: [PATCH 13/14] fix(u4): ensure U4SolidMaker voxelizes G4MultiUnion
 before return

---
 u4/U4SolidMaker.cc | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/u4/U4SolidMaker.cc b/u4/U4SolidMaker.cc
index a07a64c4e..456b066c0 100644
--- a/u4/U4SolidMaker.cc
+++ b/u4/U4SolidMaker.cc
@@ -28,6 +28,19 @@ using CLHEP::mm ;
 #include "U4SolidTree.hh"
 #include "SLOG.hh"
 
+namespace
+{
+    inline const G4VSolid* EnsureVoxelizedMultiUnion(const G4VSolid* solid)
+    {
+        const auto* mu = dynamic_cast<const G4MultiUnion*>(solid);
+        if(mu && mu->GetNumberOfSolids() > 0 && mu->GetVoxels().GetCountOfVoxels() == 0)
+        {
+            const_cast<G4MultiUnion*>(mu)->Voxelize();
+        }
+        return solid ;
+    }
+}
+
 const plog::Severity U4SolidMaker::LEVEL = SLOG::EnvLevel("U4SolidMaker", "DEBUG");
 
 const char* U4SolidMaker::NAMES = R"LITERAL(
@@ -187,7 +200,7 @@ const G4VSolid* U4SolidMaker::Make(const char* qname, std::string& meta )  // st
     else if(StartsWith("LProfileSectorPolycone",qname))               solid = LProfileSectorPolycone(qname);
     LOG(LEVEL) << " qname " << qname << " solid " << solid ;
     LOG_IF(error, solid==nullptr) << " Failed to create solid for qname " << qname << " CHECK U4SolidMaker::Make " ;
-    return solid ;
+    return EnsureVoxelizedMultiUnion(solid) ;
 }
 
 

From a721c05a090b85e11830135f6c161dc550b10c3f Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmixsmi@gmail.com>
Date: Mon, 30 Mar 2026 19:41:08 -0400
Subject: [PATCH 14/14] test(u4): add G4MultiUnion voxelized navigation tests

Add U4SolidMakerTest2 to validate G4MultiUnion distance and inside calculations through the standard
voxelized Geant4 navigation API, exercising the code path changed by the removal of the NoVoxels
special case in ssolid.h.

Test OrbOrbMultiUnion1 (3 orbs of radius 50mm at x=-100,0,+100):
- DistanceToIn from outside along Z and X axes into each orb
- DistanceToOut from inside centre and right orbs
- ray entering the leftmost orb along the +X axis
- miss for a ray parallel above all orbs
- Inside() at origin, orb centres, surface, and interior points

Test BoxFourBoxContiguous (45mm cube + 4 flanking boxes):
- DistanceToIn from outside along Z into the centre cube
- DistanceToOut from origin along +Z
- DistanceToIn hitting a flanking +X box from outside
- miss for a far-offset parallel ray
- Inside() at origin, flanking box interior, and far exterior

Test EnsureVoxelizedMultiUnion safety net:
- confirm OrbOrbMultiUnion (no explicit Voxelize() call) navigates correctly through Make(), which
  calls EnsureVoxelizedMultiUnion
- verify Inside() and Distance_ return expected values at the origin
---
 u4/tests/CMakeLists.txt       |   1 +
 u4/tests/U4SolidMakerTest2.cc | 307 ++++++++++++++++++++++++++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 u4/tests/U4SolidMakerTest2.cc

diff --git a/u4/tests/CMakeLists.txt b/u4/tests/CMakeLists.txt
index 9e6700efe..586bee34e 100644
--- a/u4/tests/CMakeLists.txt
+++ b/u4/tests/CMakeLists.txt
@@ -55,6 +55,7 @@ set(TEST_SOURCES
    U4SurfaceTest.cc
    U4SolidTest.cc
    U4SolidMakerTest.cc
+   U4SolidMakerTest2.cc
 
    U4SensitiveDetectorTest.cc 
 
diff --git a/u4/tests/U4SolidMakerTest2.cc b/u4/tests/U4SolidMakerTest2.cc
new file mode 100644
index 000000000..de5a1da99
--- /dev/null
+++ b/u4/tests/U4SolidMakerTest2.cc
@@ -0,0 +1,307 @@
+/**
+U4SolidMakerTest2.cc
+======================
+
+Test voxelized G4MultiUnion navigation via the public Geant4 API.
+
+Validates that ssolid::Distance (which now always uses the standard
+Inside/DistanceToIn/DistanceToOut path) returns correct results for
+G4MultiUnion solids produced by U4SolidMaker.
+
+Exercises:
+  - rays that clearly hit a constituent solid
+  - rays that miss entirely
+  - rays originating inside a constituent
+  - rays along axes through multiple constituents
+  - Inside() classification at known points
+  - verification that Voxelize() was called (even if voxel count is 0
+    for small numbers of solids, navigation must still work)
+
+Returns 0 on success, non-zero on failure.
+**/
+
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+#include <cassert>
+#include <vector>
+#include <string>
+
+#include "G4VSolid.hh"
+#include "G4MultiUnion.hh"
+#include "G4ThreeVector.hh"
+#include "geomdefs.hh"
+
+#include "U4SolidMaker.hh"
+#include "ssolid.h"
+#include "OPTICKS_LOG.hh"
+
+static const double kTol = 1.0e-3 ; // mm
+
+struct Ray
+{
+    const char* label ;
+    G4ThreeVector pos ;
+    G4ThreeVector dir ;
+    double        expected_t ;   // kInfinity  → expect a miss
+    double        tolerance ;
+};
+
+struct InsidePoint
+{
+    const char* label ;
+    G4ThreeVector pos ;
+    EInside       expected ;
+};
+
+static const char* EInsideName(EInside e)
+{
+    switch(e)
+    {
+        case kInside:  return "kInside"  ;
+        case kSurface: return "kSurface" ;
+        case kOutside: return "kOutside" ;
+    }
+    return "?" ;
+}
+
+static int check_rays(const char* name, const G4VSolid* solid, const std::vector<Ray>& rays)
+{
+    int fail = 0 ;
+    for(const auto& r : rays)
+    {
+        G4double t = ssolid::Distance(solid, r.pos, r.dir, false);
+
+        bool miss_expected = (r.expected_t >= kInfinity) ;
+        bool miss_got      = (t >= kInfinity) ;
+        bool ok ;
+
+        if(miss_expected)
+            ok = miss_got ;
+        else if(miss_got)
+            ok = false ;
+        else
+            ok = std::fabs(t - r.expected_t) <= r.tolerance ;
+
+        if(!ok)
+        {
+            std::cerr << "FAIL " << name << " ray=" << r.label
+                      << "  expected=" << r.expected_t
+                      << "  got=" << t
+                      << "\n" ;
+            fail++ ;
+        }
+        else
+        {
+            std::cout << "  ok " << r.label
+                      << "  t=" << std::fixed << std::setprecision(4) << t
+                      << "\n" ;
+        }
+    }
+    return fail ;
+}
+
+static int check_inside(const char* name, const G4VSolid* solid, const std::vector<InsidePoint>& pts)
+{
+    int fail = 0 ;
+    for(const auto& ip : pts)
+    {
+        EInside got = solid->Inside(ip.pos) ;
+        bool ok = (got == ip.expected) ;
+        if(!ok)
+        {
+            std::cerr << "FAIL Inside " << name << " pt=" << ip.label
+                      << "  expected=" << EInsideName(ip.expected)
+                      << "  got=" << EInsideName(got)
+                      << "\n" ;
+            fail++ ;
+        }
+        else
+        {
+            std::cout << "  ok Inside " << ip.label
+                      << "  " << EInsideName(got)
+                      << "\n" ;
+        }
+    }
+    return fail ;
+}
+
+// ---------------------------------------------------------------
+// Test 1 – OrbOrbMultiUnion1
+//
+//   3 orbs of radius 50 mm centred at x = -100, 0, +100
+//   (OrbOrbMultiUnion1 means num=1 → range -1..+1)
+//
+//   Geometry (Z cross-section along X axis):
+//
+//      left orb          centre orb         right orb
+//    x:-150..-50        x:-50..+50        x:+50..+150
+//
+//   The orbs touch at x = ±50.
+// ---------------------------------------------------------------
+static int test_OrbOrbMultiUnion()
+{
+    const char* name = "OrbOrbMultiUnion1" ;
+    const G4VSolid* solid = U4SolidMaker::Make(name);
+    if(!solid){ std::cerr << "FAIL: could not create " << name << "\n"; return 1; }
+
+    const double R = 50.0 ;
+
+    std::vector<Ray> rays = {
+        // Outside, shooting +Z into centre orb – should hit at z = -R → t = 200-50 = 150
+        { "hit_centre_orb_+Z",  {0, 0, -200}, {0, 0, 1}, 150.0, kTol },
+
+        // Outside, shooting -Z into centre orb
+        { "hit_centre_orb_-Z",  {0, 0,  200}, {0, 0,-1}, 150.0, kTol },
+
+        // Outside, shooting -X into right orb (centred at x=+100) – surface at x=+150 → t=150
+        { "hit_right_orb_-X",   {300, 0, 0}, {-1, 0, 0}, 150.0, kTol },
+
+        // Outside, shooting +X into left orb (centred at x=-100) – surface at x=-150 → t=150
+        { "hit_left_orb_+X",    {-300, 0, 0}, {1, 0, 0}, 150.0, kTol },
+
+        // Complete miss – ray parallel above all orbs (y > R)
+        { "miss_above",         {0, 0, 200}, {1, 0, 0}, kInfinity, 0 },
+
+        // Ray from inside centre orb toward +Z surface – distance = R = 50
+        { "inside_centre_+Z",   {0, 0, 0}, {0, 0, 1}, R, kTol },
+
+        // Ray from inside right orb toward +X surface – distance = R = 50
+        { "inside_right_+X",    {100, 0, 0}, {1, 0, 0}, R, kTol },
+
+        // Ray along +X from far left – enters left orb at x=-150 → t = 200-150 = 50
+        { "enter_left_along_+X", {-200, 0, 0}, {1, 0, 0}, 50.0, kTol },
+    };
+
+    int fail = check_rays(name, solid, rays) ;
+
+    // Inside() classification checks
+    std::vector<InsidePoint> pts = {
+        { "origin_inside",      {0, 0, 0},       kInside  },
+        { "right_centre",       {100, 0, 0},     kInside  },
+        { "left_centre",        {-100, 0, 0},    kInside  },
+        { "far_outside",        {0, 0, 500},     kOutside },
+        { "surface_centre_+Z",  {0, 0, R},       kSurface },
+        // x=60 is inside the right orb (centred at 100, radius 50, extends 50..150)
+        { "inside_right_orb",   {60, 0, 0},      kInside  },
+    };
+
+    fail += check_inside(name, solid, pts) ;
+    return fail ;
+}
+
+
+// ---------------------------------------------------------------
+// Test 2 – BoxFourBoxContiguous
+//
+//   Centre box is G4Box(45, 45, 45) – a 45mm half-side cube.
+//   Flanking +X/-X boxes: G4Box(10, 11.5, 6.5) at x=±52
+//   Flanking +Y/-Y boxes: G4Box(15, 15, 6.5) at y=±50
+//
+//   Full extent: x = -62..62, y = -65..65, z = -45..45
+// ---------------------------------------------------------------
+static int test_BoxFourBoxContiguous()
+{
+    const char* name = "BoxFourBoxContiguous" ;
+    const G4VSolid* solid = U4SolidMaker::Make(name);
+    if(!solid){ std::cerr << "FAIL: could not create " << name << "\n"; return 1; }
+
+    const double hz = 45.0 ; // centre box z half-extent
+
+    std::vector<Ray> rays = {
+        // Shoot +Z into centre box from above → hit at z = -45 → t = 200-45 = 155
+        { "centre_box_+Z",   {0, 0, -200}, {0, 0, 1}, 200.0 - hz, kTol },
+
+        // Shoot -Z into centre box from below → t = 155
+        { "centre_box_-Z",   {0, 0,  200}, {0, 0,-1}, 200.0 - hz, kTol },
+
+        // Miss – ray parallel far away in Y
+        { "miss_far_Y",      {0, 500, 0}, {0, 0, 1}, kInfinity, 0 },
+
+        // Ray from inside centre box → DistanceToOut(origin, +Z) = 45
+        { "inside_centre_+Z", {0, 0, 0}, {0, 0, 1}, hz, kTol },
+
+        // Ray hitting +X flanking box: box extends x=42..62, z=-6.5..6.5
+        // From (100, 0, 0) going -X → hit at x=62 → t = 38
+        { "hit_px_flank_-X",  {100, 0, 0}, {-1, 0, 0}, 100.0 - 62.0, kTol },
+    };
+
+    int fail = check_rays(name, solid, rays) ;
+
+    std::vector<InsidePoint> pts = {
+        { "origin_inside",   {0, 0, 0},       kInside  },
+        { "far_outside",     {0, 0, 500},     kOutside },
+        { "in_px_flank",     {55, 0, 0},      kInside  },
+    };
+
+    fail += check_inside(name, solid, pts) ;
+    return fail ;
+}
+
+
+// ---------------------------------------------------------------
+// Test 3 – Verify EnsureVoxelizedMultiUnion was invoked
+//
+//   OrbOrbMultiUnion does NOT call Voxelize() itself; it relies
+//   on the safety net in Make().
+//
+//   Note: Geant4's voxelizer may decide to produce 0 voxels for
+//   a small number of solids (its internal optimisation threshold).
+//   So we cannot assert voxel count > 0.  Instead, we confirm the
+//   solid is a G4MultiUnion AND navigation returns correct results
+//   (which means the standard navigation path is functioning).
+// ---------------------------------------------------------------
+static int test_navigation_without_explicit_voxelize()
+{
+    const char* name = "OrbOrbMultiUnion1" ;
+    const G4VSolid* solid = U4SolidMaker::Make(name);
+    if(!solid){ std::cerr << "FAIL: could not create " << name << "\n"; return 1; }
+
+    const G4MultiUnion* mu = dynamic_cast<const G4MultiUnion*>(solid) ;
+    if(!mu){ std::cerr << "FAIL: solid is not G4MultiUnion\n"; return 1; }
+
+    std::cout << "  nSolids=" << mu->GetNumberOfSolids()
+              << " voxels=" << mu->GetVoxels().GetCountOfVoxels()
+              << "\n" ;
+
+    // This solid has no explicit Voxelize() call in OrbOrbMultiUnion —
+    // EnsureVoxelizedMultiUnion in Make() should have called it.
+    // Verify navigation correctness: from origin (inside centre orb),
+    // DistanceToOut in +Z must equal the orb radius (50mm).
+    G4ThreeVector pos(0, 0, 0), dir(0, 0, 1) ;
+    EInside in ;
+    G4double t = ssolid::Distance_(solid, pos, dir, in) ;
+
+    if(in != kInside)
+    {
+        std::cerr << "FAIL: origin should be kInside, got " << EInsideName(in) << "\n" ;
+        return 1 ;
+    }
+    if(std::fabs(t - 50.0) > kTol)
+    {
+        std::cerr << "FAIL: expected t=50, got " << t << "\n" ;
+        return 1 ;
+    }
+    std::cout << "  ok navigation correct: in=" << EInsideName(in) << " t=" << t << "\n" ;
+    return 0 ;
+}
+
+
+int main(int argc, char** argv)
+{
+    OPTICKS_LOG(argc, argv);
+
+    int fail = 0 ;
+
+    std::cout << "--- test_OrbOrbMultiUnion ---\n" ;
+    fail += test_OrbOrbMultiUnion() ;
+
+    std::cout << "--- test_BoxFourBoxContiguous ---\n" ;
+    fail += test_BoxFourBoxContiguous() ;
+
+    std::cout << "--- test_navigation_without_explicit_voxelize ---\n" ;
+    fail += test_navigation_without_explicit_voxelize() ;
+
+    std::cout << "--- " << (fail == 0 ? "ALL PASS" : "FAILURES") << " ---\n" ;
+    return fail ;
+}