Opticks replaces Geant4 optical photon simulation with an equivalent implementation that benefits from state-of-the-art GPU ray tracing from NVIDIA OptiX. All optically relevant aspects of Geant4 context must be translated+copied to GPU:
- geometry : solids, structure, material+surface properties
- generation : Cerenkov + Scintillation (using Gensteps from Geant4)
- propagation : Rayleigh scattering, absorption, reemission, boundary
Achieving+maintaining equivalence is time consuming, BUT:
- transformative performance benefits >1000x Geant4
light produced in certain special scintillator materials, such as JUNO LS (liquid scintillator)
light produced in many materials (eg Water[refractive index 1.333]) when particle travels through it faster than the speed of light in the medium (for water when velocity v/c > 1/1.3333 = 0.75 )
34 template <typename T> struct qctx
35 {
36 curandStateXORWOW* r ;
37
38 cudaTextureObject_t scint_tex ;
..
52 qprop<T>* prop ;
..
64 QCTX_METHOD float4 boundary_lookup( unsigned ix, unsigned iy );
65 QCTX_METHOD float4 boundary_lookup( float nm, unsigned line, unsigned k );
66
67 QCTX_METHOD float scint_wavelength_hd0(curandStateXORWOW& rng);
68 QCTX_METHOD float scint_wavelength_hd10(curandStateXORWOW& rng);
69 QCTX_METHOD float scint_wavelength_hd20(curandStateXORWOW& rng);
70 QCTX_METHOD void scint_dirpol(quad4& p, curandStateXORWOW& rng);
71 QCTX_METHOD void reemit_photon(quad4& p, float scintillationTime, curandStateXORWOW& rng);
72 QCTX_METHOD void scint_photon( quad4& p, GS& g, curandStateXORWOW& rng);
73 QCTX_METHOD void scint_photon( quad4& p, curandStateXORWOW& rng);
74
75 QCTX_METHOD void cerenkov_fabricate_genstep(GS& g, bool energy_range );
..
80 QCTX_METHOD void cerenkov_photon(quad4& p, unsigned id, curandStateXORWOW& rng, const GS& g, int print_id = -1 ) ;
81 QCTX_METHOD void cerenkov_photon(quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 ) ;
82
83 QCTX_METHOD void cerenkov_photon_enprop(quad4& p, unsigned id, curandStateXORWOW& rng, const GS& g, int print_id = -1 ) ;
84 QCTX_METHOD void cerenkov_photon_enprop(quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 ) ;
85
86 QCTX_METHOD void cerenkov_photon_expt( quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 );
87
Opticks GPU equivalent
// optixrap/cu/scintillationstep.h
185 __device__ void
186 generate_scintillation_photon(Photon& p,
ScintillationStep& ss, curandState& rng)
187 {
...
199 p.wavelength =
reemission_lookup(curand_uniform(&rng));
200
// optixrap/cu/reemission_lookup.h
025 static __device__ __inline__
float reemission_lookup(float u)
26 {
27 float ui = u/reemission_domain.z + 0.5f ;
28 return tex2D(reemission_texture, ui, 0.5f );
29 }
200 G4VParticleChange*
201 DsG4Scintillation::PostStepDoIt(
const G4Track& aTrack, const G4Step& aStep)
208 {
...
540 G4PhysicsOrderedFreeVector* ScintillationIntegral = NULL;
543 ScintillationIntegral =
544 (G4PhysicsOrderedFreeVector*)
((*theFastIntegralTable)(materialIndex));
...
579 for(G4int i = 0 ; i < NumPhoton ; i++) {
...
583 G4double sampledEnergy;
586 G4double CIIvalue = G4UniformRand()*
587 ScintillationIntegral->GetMaxValue();
588 sampledEnergy=
589 ScintillationIntegral->GetEnergy(CIIvalue);
096 G4double G4PhysicsOrderedFreeVector::GetEnergy(G4double aValue)
97 {
98 G4double e;
...
104 size_t closestBin = FindValueBinLocation(aValue);
105 e = LinearInterpolationOfEnergy(aValue, closestBin);
107 return e;
108 }
110 size_t G4PhysicsOrderedFreeVector::FindValueBinLocation(G4double aValue)
111 {
112 size_t bin = std::lower_bound(dataVector.begin(), dataVector.end(), aValue)
113 - dataVector.begin() - 1;
114 bin = std::min(bin, numberOfNodes-2);
115 return bin;
116 }
| DsG4Scintillator compared with | chi2 | note |
|---|---|---|
| hd0_cudaFilterModePoint | 13.79 | "artifact" issue in tails, terrible chi2 |
| hd0 | 1.00 | interpolation enabled, fixes chi2 |
| hd20 | 0.88 | effectively x20 bins, improves chi2 |
| hd20_cudaFilterModePoint | 1.29 | XHD "HD20" even OK without interpolation |
GPU texture lookup
float scint_wavelength_hd20(curandStateXORWOW& rng)
{
float u0 = curand_uniform(&rng);
float wl ;
constexpr float y0 = 0.5f/3.f ;
constexpr float y1 = 1.5f/3.f ;
constexpr float y2 = 2.5f/3.f ;
if( u0 < 0.05f )
{
wl = tex2D(scint_tex, u0*20.f , y1 );
}
else if ( u0 > 0.95f )
{
wl = tex2D(scint_tex, (u0 - 0.95f)*20.f , y2 );
}
else
{
wl = tex2D(scint_tex, u0, y0 );
}
return wl ;
}
156 NPY<double>* X4Scintillation::CreateGeant4InterpolatedInverseCDF(
157 const G4PhysicsOrderedFreeVector* ScintillatorIntegral,
158 unsigned num_bins, unsigned hd_factor, ..
161 )
164 double mx = ScintillatorIntegral->GetMaxValue() ;
166 NPY<double>* icdf = NPY<double>::make(3, num_bins, 1 );
170 int nj = icdf->getShape(1);
180 double edge = 1./double(hd_factor) ;
...
202 for(int j=0 ; j < nj ; j++)
203 {
204 double u_all = double(j)/double(nj) ;
205 double u_lhs = double(j)/double(hd_factor*nj) ;
206 double u_rhs = 1. - edge + double(j)/double(hd_factor*nj) ;
207
208 double energy_all = ScintillatorIntegral->GetEnergy( u_all*mx );
209 double energy_lhs = ScintillatorIntegral->GetEnergy( u_lhs*mx );
210 double energy_rhs = ScintillatorIntegral->GetEnergy( u_rhs*mx );
211
212 double wavelength_all = h_Planck*c_light/energy_all/nm ;
213 double wavelength_lhs = h_Planck*c_light/energy_lhs/nm ;
214 double wavelength_rhs = h_Planck*c_light/energy_rhs/nm ;
215
217 icdf->setValue(0, j, 0, 0, wavelength_all );
218 icdf->setValue(1, j, 0, 0, wavelength_lhs );
219 icdf->setValue(2, j, 0, 0, wavelength_rhs );
220 }
221 return icdf ;
222 }
https://bitbucket.org/simoncblyth/opticks/src/master/extg4/X4Scintillation.cc
https://bitbucket.org/simoncblyth/opticks/src/master/qudarap/qctx.h
\ B /
\ . | . / AC ct / n 1 i BetaInverse
\ C | C / cos th = ---- = -------- = ------ = --- = -------------
\ . \ | / . / AB bct b n n sampledRI
. \ bct / .
\ | / BetaInverse
\ | / ct maxCos = -----------------
\ |th/ ---- nMax
\ | / n
\|/
A
Particle travels AB, light travels AC, ACB is right angle
Only get Cerenkov radiation when
cos th <= 1 ,
beta >= beta_min = 1/n BetaInverse <= BetaInverse_max = n
At the small beta threshold AB = AC, beta = beta_min = 1/n eg for n = 1.333, beta_min = 0.75
cos th = 1, th = 0 light is in direction of the particle
For ultra relativistic particle beta = 1, there is a maximum angle
th = arccos( 1/n )
Rejection Sampling
Start from flat energy distrib
sampleRI < BetaInverse :
168 G4VParticleChange*
169 G4Cerenkov_modified::PostStepDoIt(
const G4Track& aTrack, const G4Step& aStep)
...
252 G4double Pmin = Rindex->GetMinLowEdgeEnergy();
253 G4double Pmax = Rindex->GetMaxLowEdgeEnergy();
254 G4double dp = Pmax - Pmin;
...
268 G4double maxCos = BetaInverse / nMax;
270 G4double maxSin2 = (1.0 - maxCos) * (1.0 + maxCos);
...
315 for (G4int i = 0; i < fNumPhotons; i++) {
317
318 G4double rand;
319 G4double sampledEnergy, sampledRI;
320 G4double cosTheta, sin2Theta;
...
324 do {
325 rand = G4UniformRand();
326 sampledEnergy = Pmin + rand * dp;
327 sampledRI = Rindex->Value(sampledEnergy);
334 cosTheta = BetaInverse / sampledRI;
342 sin2Theta = (1.0 - cosTheta)*(1.0 + cosTheta);
343 rand = G4UniformRand();
344
346 } while (rand*maxSin2 > sin2Theta);
1.5/1.8 = .83, 1.5/1.62 = 0.92
Poor chi2 match investigations
random aligned comparison
256M curand_uniform floats -> Geant4
BUT statistical comparison still grotty chi2/ndf
Resorting to double precision rejection sampling
QUESTIONS
574 template <typename T>
575 inline QCTX_METHOD void qctx<T>::cerenkov_photon_expt(
quad4& p, unsigned id, curandStateXORWOW& rng, int print_id )
576 {
577 double BetaInverse = 1.5 ;
578 double Pmin = 1.55 ;
579 double Pmax = 15.5 ;
580 double nMax = 1.793 ;
581 double maxCos = BetaInverse / nMax;
582 double maxSin2 = ( 1. - maxCos )*( 1. + maxCos );
583
584 double u0 ;
585 double u1 ;
586 double energy ;
587 double sampledRI ;
588 double cosTheta ;
589 double sin2Theta ;
590 double u_mxs2_s2 ;
592 unsigned loop = 0u ;
593
594 do {
596 u0 = curand_uniform_double(&rng) ;
598 energy = Pmin + u0*(Pmax - Pmin) ;
600 sampledRI = prop->interpolate( 0u, energy );
602 cosTheta = BetaInverse / sampledRI ;
604 sin2Theta = (1. - cosTheta)*(1. + cosTheta);
606 u1 = curand_uniform_double(&rng) ;
608 u_mxs2_s2 = u1*maxSin2 - sin2Theta ;
610 loop += 1 ;
612 } while ( u_mxs2_s2 > 0. );
590 "uni_acrylic3" Fasteners : cost 65x more than all 45.6k PMTs
uni_acrylic3
LS absorption length (red)
GPU Ray Tracing APIs Converged
NVIDIA OptiX 6->7 : drastically slimmed down
More control/flexibility over everything.
Demands much more developer effort than OptiX 6
IAS < Inst < Solid < Prim < Node
struct CSGFoundry
{
void upload(); // to GPU
...
std::vector<CSGSolid> solid ; // compounds (eg PMT)
std::vector<CSGPrim> prim ;
std::vector<CSGNode> node ; // shapes, operators
std::vector<float4> plan ; // planes
std::vector<qat4> tran ; // CSG transforms
std::vector<qat4> itra ; // inverse CSG transforms
std::vector<qat4> inst ; // instance transforms
// entire geometry in four GPU allocations
CSGPrim* d_prim ;
CSGNode* d_node ;
float4* d_plan ;
qat4* d_itra ;
};
referencing by offset, count
Simple intersect headers, common CPU/GPU types
GAS : Geometry Acceleration Structure
IAS : Instance Acceleration Structure
CSG : Constructive Solid Geometry
1st JUNO Opticks OptiX 7 Ray-trace
Very New CSG "Foundry" CPU/GPU Geometry
Factorize ~300,000 vol -> 10 comp
"progeny digest" characterizes subtree of every volume-node
| ridx | plc | prim | component | note |
|---|---|---|---|---|
| 0 | 1 | 3084 | 3084:sWorld | non-repeated remainder |
| 1 | 25600 | 5 | 5:PMT_3inch_pmt_solid | 4 types of PMT |
| 2 | 12612 | 5 | 5:NNVTMCPPMTsMask | |
| 3 | 5000 | 5 | 5:HamamatsuR12860sMask | |
| 4 | 2400 | 5 | 5:mask_PMT_20inch_vetosMask | |
| 5 | 590 | 1 | 1:sStrutBallhead | 4 parts of same assembly, BUT not grouped as siblings (not parent-child) |
| 6 | 590 | 1 | 1:uni1 | |
| 7 | 590 | 1 | 1:base_steel | |
| 8 | 590 | 1 | 1:uni_acrylic3 | |
| 9 | 504 | 130 | 130:sPanel | repeated parts of TT |
Increasing instancing : reduces memory for geometry -> improved performance
Vary Geom. Compare Render Times
Fast render -> Fast simulation
Same viewpoint, vary GPU geometry
Very large range of times 1:600
Table identifies slow geometry to fix :
Good performance for ONLY PMTs :
| idx | -e | time(s) | relative | enabled geometry description |
|---|---|---|---|---|
| 0 | 9, | 0.0017 | 0.1702 | ONLY: 130:sPanel |
| 1 | 7, | 0.0017 | 0.1714 | ONLY: 1:base_steel |
| 2 | 6, | 0.0019 | 0.1923 | ONLY: 1:uni1 |
| 3 | 5, | 0.0027 | 0.2780 | ONLY: 1:sStrutBallhead |
| 4 | 4, | 0.0032 | 0.3268 | ONLY: 5:mask_PMT_20inch_vetosMask |
| 5 | 1, | 0.0032 | 0.3287 | ONLY: 5:PMT_3inch_pmt_solid |
| 6 | 2, | 0.0055 | 0.5669 | ONLY: 5:NNVTMCPPMTsMask |
| 7 | 3, | 0.0074 | 0.7582 | ONLY: 5:HamamatsuR12860sMask |
| 8 | 1,2,3,4 | 0.0097 | 1.0000 | ONLY PMT |
| 9 | t8,0 | 0.0099 | 1.0179 | EXCL: 1:uni_acrylic3 3084:sWorld |
| 10 | 0, | 0.1171 | 12.0293 | ONLY: 3084:sWorld |
| 11 | t8, | 0.1186 | 12.1769 | EXCL: 1:uni_acrylic3 |
| 12 | t0, | 0.5278 | 54.2066 | EXCL: 3084:sWorld |
| 13 | 8, | 0.5310 | 54.5298 | ONLY: 1:uni_acrylic3 |
| 14 | t3, | 0.6017 | 61.7954 | EXCL: 5:HamamatsuR12860sMask |
| 15 | t2, | 0.6043 | 62.0620 | EXCL: 5:NNVTMCPPMTsMask |
| 16 | t5, | 0.6171 | 63.3787 | EXCL: 1:sStrutBallhead |
| 17 | t6, | 0.6196 | 63.6301 | EXCL: 1:uni1 |
| 18 | t7, | 0.6226 | 63.9458 | EXCL: 1:base_steel |
| 19 | t0 | 0.6240 | 64.0879 | 3084:sWorld |
| 20 | t4, | 0.6243 | 64.1169 | EXCL: 5:mask_PMT_20inch_vetosMask |
| 21 | t9, | 0.6335 | 65.0636 | EXCL: 130:sPanel |
| 22 | t1, | 0.6391 | 65.6384 | EXCL: 5:PMT_3inch_pmt_solid |