PROGRESS : OptiXTest -> CSG + CSGOptiX, added CSG_GGeo
NEXT STEPS:
LONGTERM POSSIBILITY : Populate CSGFoundry model direct from Geant4 geometry ? [Disruptive]
IAS < Inst < Solid < Prim < Node
struct CSGFoundry { void upload(); // to GPU ... std::vector<CSGSolid> solid ; // compounds (eg PMT) std::vector<CSGPrim> prim ; std::vector<CSGNode> node ; // shapes, operators std::vector<float4> plan ; // planes std::vector<qat4> tran ; // CSG transforms std::vector<qat4> itra ; // inverse CSG transforms std::vector<qat4> inst ; // instance transforms // entire geometry in four GPU allocations CSGPrim* d_prim ; CSGNode* d_node ; float4* d_plan ; qat4* d_itra ; };
referencing by offset, count
Simple intersect headers, common CPU/GPU types
GAS : Geometry Acceleration Structure
IAS : Instance Acceleration Structure
CSG : Constructive Solid Geometry
geo_OptiX6Test.cu
24 rtBuffer<CSGPrim> prim_buffer; // geometry level context 25 28 rtBuffer<CSGNode> node_buffer; // global context 29 rtBuffer<qat4> itra_buffer; 30 rtBuffer<float4> plan_buffer; .. 40 RT_PROGRAM void intersect(int primIdx) 41 { 42 const CSGPrim* prim = &prim_buffer[primIdx] ; 43 int nodeOffset = prim->nodeOffset() ; 44 int numNode = prim->numNode() ; 45 const CSGNode* node = &node_buffer[nodeOffset] ; 46 const float4* plan = &plan_buffer[0] ; 47 const qat4* itra = &itra_buffer[0] ; 48 49 float4 isect ; 50 if(intersect_prim(isect, numNode, node, plan, itra, ray.tmin , ray.origin, ray.direction )) 51 { 52 if(rtPotentialIntersection(isect.w)) 53 { 55 shading_normal = make_float3( isect ); 57 rtReportIntersection(0); 58 } 59 } 60 }
OptiX7Test.cu
150 extern "C" __global__ void __intersection__is()
151 {
152 HitGroupData* hg = (HitGroupData*)optixGetSbtDataPointer();
153 int numNode = hg->numNode ;
154 int nodeOffset = hg->nodeOffset ;
155
156 const CSGNode* node = params.node + nodeOffset ;
157 const float4* plan = params.plan ;
158 const qat4* itra = params.itra ;
159
160 const float t_min = optixGetRayTmin() ;
161 const float3 ray_origin = optixGetObjectRayOrigin();
162 const float3 ray_direction = optixGetObjectRayDirection();
163
164 float4 isect ;
165 if(intersect_prim(isect, numNode, node, plan, itra,
t_min , ray_origin, ray_direction ))
166 {
...
175 optixReportIntersection( isect.w, hitKind, a0, a1, a2, a3 );
176 }
177 }
Minimize code split : 7, pre-7, CPU testing : same intersect_prim
Converter : GGeo/GParts -> CSG
struct CSG_GGeo_Convert { CSGFoundry* foundry ; const GGeo* ggeo ; const Opticks* ok ; .. CSG_GGeo_Convert(CSGFoundry* foundry, const GGeo* ggeo ) ; void init(); void convert(int repeatIdx=-1, int primIdx=-1, int partIdxRel=-1 ); void convert_(); CSGSolid* convert_(unsigned repeatIdx ); void addInstances(unsigned repeatIdx ); CSGPrim* convert_(const GParts* comp, unsigned primIdx ); CSGNode* convert_(const GParts* comp, unsigned primIdx, unsigned partIdxRel ); // below called non-standardly when corresponding envvars defined void addOnePrimSolid(); void addOnePrimSolid(unsigned solidIdx); void addOneNodeSolid(); void addOneNodeSolid(unsigned solidIdx); void addOneNodeSolid(unsigned solidIdx, unsigned primIdx, unsigned primIdxRel); void addDeepCopySolid(); void kludgeScalePrimBBox(); };
.. 11 int main(int argc, char** argv) 12 { 13 OPTICKS_LOG(argc, argv); 14 15 Opticks ok(argc, argv); 16 ok.configure(); 17 18 GGeo* ggeo = GGeo::Load(&ok); 19 20 CSGFoundry foundry ; 21 CSG_GGeo_Convert conv(&foundry, ggeo) ; 22 conv.convert(); ... 39 foundry.write(cfbase, rel ); 40 41 CSGFoundry* fd = CSGFoundry::Load(cfbase, rel); 42 assert( 0 == CSGFoundry::Compare(&foundry, fd ) ); 43 44 return 0 ; 45 }
1st JUNO Opticks OptiX 7 Ray-trace
Very New CSG "Foundry" CPU/GPU Geometry
Factorize ~300,000 vol -> 10 comp
"progeny digest" characterizes subtree of every volume-node
ridx | plc | prim | component | note |
---|---|---|---|---|
0 | 1 | 3084 | 3084:sWorld | non-repeated remainder |
1 | 25600 | 5 | 5:PMT_3inch_pmt_solid | 4 types of PMT |
2 | 12612 | 5 | 5:NNVTMCPPMTsMask | |
3 | 5000 | 5 | 5:HamamatsuR12860sMask | |
4 | 2400 | 5 | 5:mask_PMT_20inch_vetosMask | |
5 | 590 | 1 | 1:sStrutBallhead | 4 parts of same assembly, BUT not grouped as siblings (not parent-child) |
6 | 590 | 1 | 1:uni1 | |
7 | 590 | 1 | 1:base_steel | |
8 | 590 | 1 | 1:uni_acrylic3 | |
9 | 504 | 130 | 130:sPanel | repeated parts of TT |
Increasing instancing : reduces memory for geometry -> improved performance
Vary Geom. Compare Render Times
Fast render -> Fast simulation
Same viewpoint, vary GPU geometry
Very large range of times 1:600
Table identifies slow geometry to fix :
Good performance for ONLY PMTs :
idx | -e | time(s) | relative | enabled geometry description |
---|---|---|---|---|
0 | 9, | 0.0017 | 0.1702 | ONLY: 130:sPanel |
1 | 7, | 0.0017 | 0.1714 | ONLY: 1:base_steel |
2 | 6, | 0.0019 | 0.1923 | ONLY: 1:uni1 |
3 | 5, | 0.0027 | 0.2780 | ONLY: 1:sStrutBallhead |
4 | 4, | 0.0032 | 0.3268 | ONLY: 5:mask_PMT_20inch_vetosMask |
5 | 1, | 0.0032 | 0.3287 | ONLY: 5:PMT_3inch_pmt_solid |
6 | 2, | 0.0055 | 0.5669 | ONLY: 5:NNVTMCPPMTsMask |
7 | 3, | 0.0074 | 0.7582 | ONLY: 5:HamamatsuR12860sMask |
8 | 1,2,3,4 | 0.0097 | 1.0000 | ONLY PMT |
9 | t8,0 | 0.0099 | 1.0179 | EXCL: 1:uni_acrylic3 3084:sWorld |
10 | 0, | 0.1171 | 12.0293 | ONLY: 3084:sWorld |
11 | t8, | 0.1186 | 12.1769 | EXCL: 1:uni_acrylic3 |
12 | t0, | 0.5278 | 54.2066 | EXCL: 3084:sWorld |
13 | 8, | 0.5310 | 54.5298 | ONLY: 1:uni_acrylic3 |
14 | t3, | 0.6017 | 61.7954 | EXCL: 5:HamamatsuR12860sMask |
15 | t2, | 0.6043 | 62.0620 | EXCL: 5:NNVTMCPPMTsMask |
16 | t5, | 0.6171 | 63.3787 | EXCL: 1:sStrutBallhead |
17 | t6, | 0.6196 | 63.6301 | EXCL: 1:uni1 |
18 | t7, | 0.6226 | 63.9458 | EXCL: 1:base_steel |
19 | t0 | 0.6240 | 64.0879 | 3084:sWorld |
20 | t4, | 0.6243 | 64.1169 | EXCL: 5:mask_PMT_20inch_vetosMask |
21 | t9, | 0.6335 | 65.0636 | EXCL: 130:sPanel |
22 | t1, | 0.6391 | 65.6384 | EXCL: 5:PMT_3inch_pmt_solid |
/env/presentation/cxr/cxr_overview/cxr_i0_1,2,3,4_-1.jpg 1280px_720px
"Flat" look is a bug
Was only rendering the last Prim for GAS after the 1st
CAUSE: using globalPrimIdx for SBTIndexOffset : needs to be GAS local
/env/presentation/cxr/cxr_overview/cxr_i0_0,_-1.jpg 1280px_720px
3084 "remainder" non-instance volumes : is far too many
Looks to be obvious repetitions missed by the auto-instancer
/CSG_GGeo/cvd1/70000/cxr_overview/cam_0_tmin_0.4/cxr_overview_emm_t8,_moi_-1.jpg 1280px_720px
Flipping between this and the next, shows the missing Prim bug effect
/env/presentation/cxr/cxr_overview/cxr_i0_t8,_-1.jpg 1280px_720px
Flipping between this and the prev, shows the missing Prim bug effect
Inside View with only global remainder geometry
Same viewpoint in OptiX 5,6 and 7
/CSG_GGeo/cvd0/50001/cxr_view/cam_0_1,/cxr_view_sWaterTube.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_view/cam_0_1,/cxr_view_sWaterTube.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_view/cam_0_1,/cxr_view_sWaterTube.jpg 640px_360px 640px_360px
These renders use ONE_PRIM_SOLID envvar with the Converter to add CSGFoundry solids containing a single Prim only.
Example solid names: r1p0 r1p1 r1p2 r1p3 r1p4
Then can plot them all using:
./cxr_solid.sh r1p
The argument selects 5 single prim solids and puts them into an IAS with Y-translations.
/CSG_GGeo/cvd0/50001/cxr_view/cam_0_2,/cxr_view_sWaterTube.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_view/cam_0_2,/cxr_view_sWaterTube.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_view/cam_0_2,/cxr_view_sWaterTube.jpg 640px_360px 640px_360px
OptiX 5,6,7
Last Prim SBT bug apparent with 7
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r2p.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r2p.jpg 640px_360px 640px_360px
Five of the debug single Prim solids selected with:
./cxr_solid.sh r2p
/CSG_GGeo/cvd0/50001/cxr_view/cam_0_9,/cxr_view_sWaterTube.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_view/cam_0_9,/cxr_view_sWaterTube.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_view/cam_0_9,/cxr_view_sWaterTube.jpg 640px_360px 640px_360px
ridx | plc | prim | component | note |
---|---|---|---|---|
9 | 504 | 130 | 130:sPanel | repeated parts of TT |
Only last of 130 Prim appears (OptiX 7), again due to globalPrimIdx bug
/CSG_GGeo/cvd0/50001/cxr_view/cam_0_8,/cxr_view_sWaterTube.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_view/cam_0_8,/cxr_view_sWaterTube.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_view/cam_0_8,/cxr_view_sWaterTube.jpg 640px_360px 640px_360px
Blank render for solid 8 in OptiX 5,6 and 7
Solid 8 "1:uni_acrylic3" is the "plunger" cup that holds the acrylic 35m diameter sphere
Currently the shape does a CSG subtraction with the huge sphere
ridx | plc | prim | component | note |
---|---|---|---|---|
8 | 590 | 1 | 1:uni_acrylic3 |
590 of these all around the 35m diameter sphere
Has complicated CSG insides too, subtracting the 8 column "greek temple"
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r0@_.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_solid/cam_1/cxr_solid_r0@_.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r0@_.jpg 640px_360px 640px_360px
Argument r0@ matches only solid named "r0"
EYE=0,-1,0 TMIN=1 CAM=1 ./cxr_solid.sh r0@
EYE and TMIN in units of selected solid extent, CAM:0/1 perspective/orthographic
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r1@_.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_solid/cam_1/cxr_solid_r1@_.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r1@_.jpg 640px_360px 640px_360px
Argument r1@ matches only solid named "r1"
EYE=0,-1,0 TMIN=1 CAM=1 ./cxr_solid.sh r1@
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r2@_.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_solid/cam_1/cxr_solid_r2@_.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r2@_.jpg 640px_360px 640px_360px
EYE=0,-1,0 TMIN=1 CAM=1 ./cxr_solid.sh r1@
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r3@_.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_solid/cam_1/cxr_solid_r3@_.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r3@_.jpg 640px_360px 640px_360px
/CSG_GGeo/cvd0/50001/cxr_solid/cam_1/cxr_solid_r4@_.jpg 640px_360px 640px_0px
/CSG_GGeo/cvd1/60500/cxr_solid/cam_1/cxr_solid_r4@_.jpg 640px_360px 0px_360px
/CSG_GGeo/cvd1/70000/cxr_solid/cam_1/cxr_solid_r4@_.jpg 640px_360px 640px_360px
TODO: investigate this difference
OptiX supports multiple instance levels : IAS->IAS->GAS BUT: Simple two-level is faster : works in hardware RT Cores
SBT : Shader Binding Table
Flexibly binds together:
Hidden in OptiX 1-6 APIs
Optimization : deciding where to draw lines between:
Where those lines are drawn defines the AS
https://developer.nvidia.com/blog/best-practices-using-nvidia-rtx-ray-tracing/
https://developer.nvidia.com/blog/best-practices-using-nvidia-rtx-ray-tracing/
Advantages apply equally to acceleration structures
Equivalent Intersects -> same t
Local Frame Advantages
Geometry Instancing Advantages
Requirements
Outside/Inside Unions
dot(normal,rayDir) -> Enter/Exit
Complete Binary Tree, pick between pairs of nearest intersects:
UNION tA < tB | Enter B | Exit B | Miss B |
---|---|---|---|
Enter A | ReturnA | LoopA | ReturnA |
Exit A | ReturnA | ReturnB | ReturnA |
Miss A | ReturnB | ReturnB | ReturnMiss |
CSG Binary Tree
Primitives combined via binary operators
Simple by construction definition, implicit geometry.
CSG expressions
3D Parametric Ray : ray(t) = r0 + t rDir
Ray Geometry Intersection
How to pick exactly ?
In/On/Out transitions
Classical Roth diagram approach
Computational requirements:
BUT : High performance on GPU requires:
Classical approach not appropriate on GPU
Bit Twiddling Navigation
Geant4 solid -> CSG binary tree (leaf primitives, non-leaf operators, 4x4 transforms on any node)
Serialize to complete binary tree buffer:
Height 3 complete binary tree with level order indices:
depth elevation 1 0 3 10 11 1 2 100 101 110 111 2 1 1000 1001 1010 1011 1100 1101 1110 1111 3 0
postorder_next(i,elevation) = i & 1 ? i >> 1 : (i << elevation) + (1 << elevation) ; // from pattern of bits
Postorder tree traverse visits all nodes, starting from leftmost, such that children are visited prior to their parents.
fullTree = PACK( 1 << height, 1 >> 1 ) // leftmost, parent_of_root(=0) tranche.push(fullTree, ray.tmin) while (!tranche.empty) // stack of begin/end indices { begin, end, tmin <- tranche.pop ; node <- begin ; while( node != end ) // over tranche of postorder traversal { elevation = height - TREE_DEPTH(node) ; if(is_primitive(node)){ isect <- intersect_primitive(node, tmin) ; csg.push(isect) } else{ i_left, i_right = csg.pop, csg.pop // csg stack of intersect normals, t l_state = CLASSIFY(i_left, ray.direction, tmin) r_state = CLASSIFY(i_right, ray.direction, tmin) action = LUT(operator(node), leftIsCloser)(l_state, r_state) if( action is ReturnLeft/Right) csg.push(i_left or i_right) else if( action is LoopLeft/Right) { left = 2*node ; right = 2*node + 1 ; endTranche = PACK( node, end ); leftTranche = PACK( left << (elevation-1), right << (elevation-1) ) rightTranche = PACK( right << (elevation-1), node ) loopTranche = action ? leftTranche : rightTranche tranche.push(endTranche, tmin) tranche.push(loopTranche, tminAdvanced ) // subtree re-traversal with changed tmin break ; // to next tranche } } node <- postorder_next(node, elevation) // bit twiddling postorder } } isect = csg.pop(); // winning intersect
https://bitbucket.org/simoncblyth/opticks/src/tip/optixrap/cu/csg_intersect_boolean.h
Positive form CSG Trees
Apply deMorgan pushing negations down tree
End with only UNION, INTERSECT operators, and some complemented leaves.
COMMUTATIVE -> easily rearranged
1st step to allow balancing : Positivize : remove CSG difference di operators
... ... un cy un cy un cy un cy un cy di cy cy cy
... ... un cy un cy un cy un cy un cy in cy cy !cy