feat: add per-process and per-photon G4 timing instrumentation#280
feat: add per-process and per-photon G4 timing instrumentation#280
Conversation
Add optical photon performance profiling to GPURaytrace: - Per-process step timing (Transportation, OpWLS, OpRayleigh, OpAbsorption) using std::chrono::steady_clock between consecutive SteppingAction calls - Per-photon lifetime timing with median, percentiles, and histogram - Per-photon step count statistics (avg, median, p10/p50/p90/p99) - --skip-gpu flag to measure G4-only photon propagation without GPU Add benchmark script (examples/photontiming_geant4/photontimingandsteps.sh) that correctly measures GPU vs G4 speedup using three runs: 1. G4 with photons, no GPU (--skip-gpu) 2. G4 baseline with setStackPhotons false 3. Normal GPU run Measured on apex.gdml (10 MeV electron, ~250k photons, RTX 4090): - Transportation: 0.77 us/step, 114M steps, 99.8% of total time - OpWLS: 0.85 us/invocation, 42k invocations, 0.07% of total - Median photon: 2.1 us (UV, exits world quickly) - Average photon: 306 us (skewed by 4% WLS-converted tail at 1-10ms) - Speedup: ~1,400x (single-threaded G4 vs GPU)
There was a problem hiding this comment.
Cpp-linter Review
Used clang-format v20.1.2
Click here for the full clang-format patch
diff --git a/src/GPURaytrace.cpp b/src/GPURaytrace.cpp
index 64386ca..be1dd60 100644
--- a/src/GPURaytrace.cpp
+++ b/src/GPURaytrace.cpp
@@ -78,3 +78 @@ int main(int argc, char **argv)
- program.add_argument("--skip-gpu")
- .help("skip GPU photon propagation (for measuring G4-only photon time)")
- .flag();
+ program.add_argument("--skip-gpu").help("skip GPU photon propagation (for measuring G4-only photon time)").flag();
diff --git a/src/GPURaytrace.h b/src/GPURaytrace.h
index b61f0d6..ccad899 100644
--- a/src/GPURaytrace.h
+++ b/src/GPURaytrace.h
@@ -483,4 +483,20 @@ struct SteppingAction : G4UserSteppingAction
- if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; }
- else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; }
- else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; }
- else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; }
+ if (pname == "Transportation")
+ {
+ fTimeTransport += dt;
+ fCountTransport++;
+ }
+ else if (pname == "OpWLS")
+ {
+ fTimeOpWLS += dt;
+ fCountOpWLS++;
+ }
+ else if (pname == "OpRayleigh")
+ {
+ fTimeOpRayleigh += dt;
+ fCountOpRayleigh++;
+ }
+ else if (pname == "OpAbsorption")
+ {
+ fTimeOpAbsorption += dt;
+ fCountOpAbsorption++;
+ }
@@ -507 +523,2 @@ struct SteppingAction : G4UserSteppingAction
- if (fSkipGenstep) return; // skip genstep collection for timing-only runs
+ if (fSkipGenstep)
+ return; // skip genstep collection for timing-only runs
@@ -637 +654,3 @@ struct TrackingAction : G4UserTrackingAction
- while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {}
+ while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt))
+ {
+ }
@@ -639 +658,3 @@ struct TrackingAction : G4UserTrackingAction
- while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {}
+ while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt))
+ {
+ }
@@ -641,6 +662,12 @@ struct TrackingAction : G4UserTrackingAction
- if (dt < 1000) fTimeBucket0++;
- else if (dt < 10000) fTimeBucket1++;
- else if (dt < 100000) fTimeBucket2++;
- else if (dt < 1000000) fTimeBucket3++;
- else if (dt < 10000000) fTimeBucket4++;
- else fTimeBucket5++;
+ if (dt < 1000)
+ fTimeBucket0++;
+ else if (dt < 10000)
+ fTimeBucket1++;
+ else if (dt < 100000)
+ fTimeBucket2++;
+ else if (dt < 1000000)
+ fTimeBucket3++;
+ else if (dt < 10000000)
+ fTimeBucket4++;
+ else
+ fTimeBucket5++;
@@ -649 +676,5 @@ struct TrackingAction : G4UserTrackingAction
- { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); }
+ {
+ std::lock_guard<std::mutex> lock(fTimesMutex);
+ fAllTimes.push_back(dt);
+ fAllSteps.push_back(nsteps);
+ }
@@ -656 +687,2 @@ struct TrackingAction : G4UserTrackingAction
- if (n == 0) return;
+ if (n == 0)
+ return;
@@ -691 +723,2 @@ struct TrackingAction : G4UserTrackingAction
- for (int s : fAllSteps) step_sum += s;
+ for (int s : fAllSteps)
+ step_sum += s;
@@ -699,4 +732,2 @@ struct TrackingAction : G4UserTrackingAction
- << "p10=" << fAllSteps[ssz / 10]
- << ", p50=" << fAllSteps[ssz / 2]
- << ", p90=" << fAllSteps[ssz * 9 / 10]
- << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl;
+ << "p10=" << fAllSteps[ssz / 10] << ", p50=" << fAllSteps[ssz / 2]
+ << ", p90=" << fAllSteps[ssz * 9 / 10] << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl;
@@ -718,6 +749,4 @@ inline void RunAction::PrintTimingReport()
- std::cout << "Geant4: StepTime " << std::setw(15) << name
- << ": count=" << std::setw(10) << count
- << " avg=" << std::fixed << std::setprecision(2) << std::setw(8)
- << total_ns / 1000.0 / count << " us"
- << " total=" << std::setprecision(3) << std::setw(8)
- << total_ns / 1e9 << " s" << std::endl;
+ std::cout << "Geant4: StepTime " << std::setw(15) << name << ": count=" << std::setw(10) << count
+ << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) << total_ns / 1000.0 / count
+ << " us"
+ << " total=" << std::setprecision(3) << std::setw(8) << total_ns / 1e9 << " s" << std::endl;
Have any feedback or feature suggestions? Share it here.
| program.add_argument("--skip-gpu") | ||
| .help("skip GPU photon propagation (for measuring G4-only photon time)") | ||
| .flag(); |
There was a problem hiding this comment.
clang-format suggestion
| program.add_argument("--skip-gpu") | |
| .help("skip GPU photon propagation (for measuring G4-only photon time)") | |
| .flag(); | |
| program.add_argument("--skip-gpu").help("skip GPU photon propagation (for measuring G4-only photon time)").flag(); |
| if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; } | ||
| else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; } | ||
| else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; } | ||
| else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; } |
There was a problem hiding this comment.
clang-format suggestion
| if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; } | |
| else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; } | |
| else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; } | |
| else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; } | |
| if (pname == "Transportation") | |
| { | |
| fTimeTransport += dt; | |
| fCountTransport++; | |
| } | |
| else if (pname == "OpWLS") | |
| { | |
| fTimeOpWLS += dt; | |
| fCountOpWLS++; | |
| } | |
| else if (pname == "OpRayleigh") | |
| { | |
| fTimeOpRayleigh += dt; | |
| fCountOpRayleigh++; | |
| } | |
| else if (pname == "OpAbsorption") | |
| { | |
| fTimeOpAbsorption += dt; | |
| fCountOpAbsorption++; | |
| } |
| } | ||
| } | ||
|
|
||
| if (fSkipGenstep) return; // skip genstep collection for timing-only runs |
There was a problem hiding this comment.
clang-format suggestion
| if (fSkipGenstep) return; // skip genstep collection for timing-only runs | |
| if (fSkipGenstep) | |
| return; // skip genstep collection for timing-only runs |
| fPhotonCount++; | ||
|
|
||
| long long cur = fMinPhotonTime.load(); | ||
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} |
There was a problem hiding this comment.
clang-format suggestion
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} | |
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) | |
| { | |
| } |
| long long cur = fMinPhotonTime.load(); | ||
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} | ||
| cur = fMaxPhotonTime.load(); | ||
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {} |
There was a problem hiding this comment.
clang-format suggestion
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {} | |
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) | |
| { | |
| } |
| else fTimeBucket5++; | ||
|
|
||
| int nsteps = track->GetCurrentStepNumber(); | ||
| { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); } |
There was a problem hiding this comment.
clang-format suggestion
| { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); } | |
| { | |
| std::lock_guard<std::mutex> lock(fTimesMutex); | |
| fAllTimes.push_back(dt); | |
| fAllSteps.push_back(nsteps); | |
| } |
| void PrintPhotonTiming() | ||
| { | ||
| int n = fPhotonCount.load(); | ||
| if (n == 0) return; |
There was a problem hiding this comment.
clang-format suggestion
| if (n == 0) return; | |
| if (n == 0) | |
| return; |
| if (ssz > 0) | ||
| { | ||
| long long step_sum = 0; | ||
| for (int s : fAllSteps) step_sum += s; |
There was a problem hiding this comment.
clang-format suggestion
| for (int s : fAllSteps) step_sum += s; | |
| for (int s : fAllSteps) | |
| step_sum += s; |
| << "p10=" << fAllSteps[ssz / 10] | ||
| << ", p50=" << fAllSteps[ssz / 2] | ||
| << ", p90=" << fAllSteps[ssz * 9 / 10] | ||
| << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; |
There was a problem hiding this comment.
clang-format suggestion
| << "p10=" << fAllSteps[ssz / 10] | |
| << ", p50=" << fAllSteps[ssz / 2] | |
| << ", p90=" << fAllSteps[ssz * 9 / 10] | |
| << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; | |
| << "p10=" << fAllSteps[ssz / 10] << ", p50=" << fAllSteps[ssz / 2] | |
| << ", p90=" << fAllSteps[ssz * 9 / 10] << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; |
| std::cout << "Geant4: StepTime " << std::setw(15) << name | ||
| << ": count=" << std::setw(10) << count | ||
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) | ||
| << total_ns / 1000.0 / count << " us" | ||
| << " total=" << std::setprecision(3) << std::setw(8) | ||
| << total_ns / 1e9 << " s" << std::endl; |
There was a problem hiding this comment.
clang-format suggestion
| std::cout << "Geant4: StepTime " << std::setw(15) << name | |
| << ": count=" << std::setw(10) << count | |
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) | |
| << total_ns / 1000.0 / count << " us" | |
| << " total=" << std::setprecision(3) << std::setw(8) | |
| << total_ns / 1e9 << " s" << std::endl; | |
| std::cout << "Geant4: StepTime " << std::setw(15) << name << ": count=" << std::setw(10) << count | |
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) << total_ns / 1000.0 / count | |
| << " us" | |
| << " total=" << std::setprecision(3) << std::setw(8) << total_ns / 1e9 << " s" << std::endl; |
The genstep collection (Cerenkov/Scintillation CollectGenstep calls and associated mutex locks) adds overhead to the G4-only timing run. Guard it with fSkipGenstep flag so Run 1 measures pure G4 photon propagation without GPU-related bookkeeping.
b799d72 to
9bfd6fa
Compare
Cpp-Linter Report
|
There was a problem hiding this comment.
Cpp-linter Review
Used clang-format v20.1.2
Click here for the full clang-format patch
diff --git a/src/GPURaytrace.cpp b/src/GPURaytrace.cpp
index 64386ca..be1dd60 100644
--- a/src/GPURaytrace.cpp
+++ b/src/GPURaytrace.cpp
@@ -78,3 +78 @@ int main(int argc, char **argv)
- program.add_argument("--skip-gpu")
- .help("skip GPU photon propagation (for measuring G4-only photon time)")
- .flag();
+ program.add_argument("--skip-gpu").help("skip GPU photon propagation (for measuring G4-only photon time)").flag();
diff --git a/src/GPURaytrace.h b/src/GPURaytrace.h
index b61f0d6..ccad899 100644
--- a/src/GPURaytrace.h
+++ b/src/GPURaytrace.h
@@ -483,4 +483,20 @@ struct SteppingAction : G4UserSteppingAction
- if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; }
- else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; }
- else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; }
- else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; }
+ if (pname == "Transportation")
+ {
+ fTimeTransport += dt;
+ fCountTransport++;
+ }
+ else if (pname == "OpWLS")
+ {
+ fTimeOpWLS += dt;
+ fCountOpWLS++;
+ }
+ else if (pname == "OpRayleigh")
+ {
+ fTimeOpRayleigh += dt;
+ fCountOpRayleigh++;
+ }
+ else if (pname == "OpAbsorption")
+ {
+ fTimeOpAbsorption += dt;
+ fCountOpAbsorption++;
+ }
@@ -507 +523,2 @@ struct SteppingAction : G4UserSteppingAction
- if (fSkipGenstep) return; // skip genstep collection for timing-only runs
+ if (fSkipGenstep)
+ return; // skip genstep collection for timing-only runs
@@ -637 +654,3 @@ struct TrackingAction : G4UserTrackingAction
- while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {}
+ while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt))
+ {
+ }
@@ -639 +658,3 @@ struct TrackingAction : G4UserTrackingAction
- while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {}
+ while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt))
+ {
+ }
@@ -641,6 +662,12 @@ struct TrackingAction : G4UserTrackingAction
- if (dt < 1000) fTimeBucket0++;
- else if (dt < 10000) fTimeBucket1++;
- else if (dt < 100000) fTimeBucket2++;
- else if (dt < 1000000) fTimeBucket3++;
- else if (dt < 10000000) fTimeBucket4++;
- else fTimeBucket5++;
+ if (dt < 1000)
+ fTimeBucket0++;
+ else if (dt < 10000)
+ fTimeBucket1++;
+ else if (dt < 100000)
+ fTimeBucket2++;
+ else if (dt < 1000000)
+ fTimeBucket3++;
+ else if (dt < 10000000)
+ fTimeBucket4++;
+ else
+ fTimeBucket5++;
@@ -649 +676,5 @@ struct TrackingAction : G4UserTrackingAction
- { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); }
+ {
+ std::lock_guard<std::mutex> lock(fTimesMutex);
+ fAllTimes.push_back(dt);
+ fAllSteps.push_back(nsteps);
+ }
@@ -656 +687,2 @@ struct TrackingAction : G4UserTrackingAction
- if (n == 0) return;
+ if (n == 0)
+ return;
@@ -691 +723,2 @@ struct TrackingAction : G4UserTrackingAction
- for (int s : fAllSteps) step_sum += s;
+ for (int s : fAllSteps)
+ step_sum += s;
@@ -699,4 +732,2 @@ struct TrackingAction : G4UserTrackingAction
- << "p10=" << fAllSteps[ssz / 10]
- << ", p50=" << fAllSteps[ssz / 2]
- << ", p90=" << fAllSteps[ssz * 9 / 10]
- << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl;
+ << "p10=" << fAllSteps[ssz / 10] << ", p50=" << fAllSteps[ssz / 2]
+ << ", p90=" << fAllSteps[ssz * 9 / 10] << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl;
@@ -718,6 +749,4 @@ inline void RunAction::PrintTimingReport()
- std::cout << "Geant4: StepTime " << std::setw(15) << name
- << ": count=" << std::setw(10) << count
- << " avg=" << std::fixed << std::setprecision(2) << std::setw(8)
- << total_ns / 1000.0 / count << " us"
- << " total=" << std::setprecision(3) << std::setw(8)
- << total_ns / 1e9 << " s" << std::endl;
+ std::cout << "Geant4: StepTime " << std::setw(15) << name << ": count=" << std::setw(10) << count
+ << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) << total_ns / 1000.0 / count
+ << " us"
+ << " total=" << std::setprecision(3) << std::setw(8) << total_ns / 1e9 << " s" << std::endl;
Have any feedback or feature suggestions? Share it here.
| program.add_argument("--skip-gpu") | ||
| .help("skip GPU photon propagation (for measuring G4-only photon time)") | ||
| .flag(); |
There was a problem hiding this comment.
clang-format suggestion
| program.add_argument("--skip-gpu") | |
| .help("skip GPU photon propagation (for measuring G4-only photon time)") | |
| .flag(); | |
| program.add_argument("--skip-gpu").help("skip GPU photon propagation (for measuring G4-only photon time)").flag(); |
| if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; } | ||
| else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; } | ||
| else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; } | ||
| else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; } |
There was a problem hiding this comment.
clang-format suggestion
| if (pname == "Transportation") { fTimeTransport += dt; fCountTransport++; } | |
| else if (pname == "OpWLS") { fTimeOpWLS += dt; fCountOpWLS++; } | |
| else if (pname == "OpRayleigh"){ fTimeOpRayleigh += dt; fCountOpRayleigh++; } | |
| else if (pname == "OpAbsorption"){ fTimeOpAbsorption += dt; fCountOpAbsorption++; } | |
| if (pname == "Transportation") | |
| { | |
| fTimeTransport += dt; | |
| fCountTransport++; | |
| } | |
| else if (pname == "OpWLS") | |
| { | |
| fTimeOpWLS += dt; | |
| fCountOpWLS++; | |
| } | |
| else if (pname == "OpRayleigh") | |
| { | |
| fTimeOpRayleigh += dt; | |
| fCountOpRayleigh++; | |
| } | |
| else if (pname == "OpAbsorption") | |
| { | |
| fTimeOpAbsorption += dt; | |
| fCountOpAbsorption++; | |
| } |
| } | ||
| } | ||
|
|
||
| if (fSkipGenstep) return; // skip genstep collection for timing-only runs |
There was a problem hiding this comment.
clang-format suggestion
| if (fSkipGenstep) return; // skip genstep collection for timing-only runs | |
| if (fSkipGenstep) | |
| return; // skip genstep collection for timing-only runs |
| fPhotonCount++; | ||
|
|
||
| long long cur = fMinPhotonTime.load(); | ||
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} |
There was a problem hiding this comment.
clang-format suggestion
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} | |
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) | |
| { | |
| } |
| long long cur = fMinPhotonTime.load(); | ||
| while (dt < cur && !fMinPhotonTime.compare_exchange_weak(cur, dt)) {} | ||
| cur = fMaxPhotonTime.load(); | ||
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {} |
There was a problem hiding this comment.
clang-format suggestion
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) {} | |
| while (dt > cur && !fMaxPhotonTime.compare_exchange_weak(cur, dt)) | |
| { | |
| } |
| else fTimeBucket5++; | ||
|
|
||
| int nsteps = track->GetCurrentStepNumber(); | ||
| { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); } |
There was a problem hiding this comment.
clang-format suggestion
| { std::lock_guard<std::mutex> lock(fTimesMutex); fAllTimes.push_back(dt); fAllSteps.push_back(nsteps); } | |
| { | |
| std::lock_guard<std::mutex> lock(fTimesMutex); | |
| fAllTimes.push_back(dt); | |
| fAllSteps.push_back(nsteps); | |
| } |
| void PrintPhotonTiming() | ||
| { | ||
| int n = fPhotonCount.load(); | ||
| if (n == 0) return; |
There was a problem hiding this comment.
clang-format suggestion
| if (n == 0) return; | |
| if (n == 0) | |
| return; |
| if (ssz > 0) | ||
| { | ||
| long long step_sum = 0; | ||
| for (int s : fAllSteps) step_sum += s; |
There was a problem hiding this comment.
clang-format suggestion
| for (int s : fAllSteps) step_sum += s; | |
| for (int s : fAllSteps) | |
| step_sum += s; |
| << "p10=" << fAllSteps[ssz / 10] | ||
| << ", p50=" << fAllSteps[ssz / 2] | ||
| << ", p90=" << fAllSteps[ssz * 9 / 10] | ||
| << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; |
There was a problem hiding this comment.
clang-format suggestion
| << "p10=" << fAllSteps[ssz / 10] | |
| << ", p50=" << fAllSteps[ssz / 2] | |
| << ", p90=" << fAllSteps[ssz * 9 / 10] | |
| << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; | |
| << "p10=" << fAllSteps[ssz / 10] << ", p50=" << fAllSteps[ssz / 2] | |
| << ", p90=" << fAllSteps[ssz * 9 / 10] << ", p99=" << fAllSteps[ssz * 99 / 100] << std::endl; |
| std::cout << "Geant4: StepTime " << std::setw(15) << name | ||
| << ": count=" << std::setw(10) << count | ||
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) | ||
| << total_ns / 1000.0 / count << " us" | ||
| << " total=" << std::setprecision(3) << std::setw(8) | ||
| << total_ns / 1e9 << " s" << std::endl; |
There was a problem hiding this comment.
clang-format suggestion
| std::cout << "Geant4: StepTime " << std::setw(15) << name | |
| << ": count=" << std::setw(10) << count | |
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) | |
| << total_ns / 1000.0 / count << " us" | |
| << " total=" << std::setprecision(3) << std::setw(8) | |
| << total_ns / 1e9 << " s" << std::endl; | |
| std::cout << "Geant4: StepTime " << std::setw(15) << name << ": count=" << std::setw(10) << count | |
| << " avg=" << std::fixed << std::setprecision(2) << std::setw(8) << total_ns / 1000.0 / count | |
| << " us" | |
| << " total=" << std::setprecision(3) << std::setw(8) << total_ns / 1e9 << " s" << std::endl; |
Add optical photon performance profiling to a new example based GPURaytrace:
Add benchmark script (examples/photontiming_geant4/photontimingandsteps.sh) that correctly measures GPU vs G4 speedup using three runs:
Measured on apex.gdml (10 MeV electron, ~250k photons, RTX 4090):
Shall be merged after wavelength shifting is