diff --git a/cuda/common.h b/cuda/common.h index 348d57c..2580a7f 100644 --- a/cuda/common.h +++ b/cuda/common.h @@ -65,5 +65,7 @@ struct Params float hf_ve; // vertical exaggeration int hf_tile_size; // tile dimension (e.g. 32) int hf_num_tiles_x; // number of tiles in X direction - int _pad0; // padding to 88 bytes + int _pad0; // padding for pointer alignment + // --- point cloud fields (offset 88) --- + float* point_colors; // per-point RGBA (4 floats per point, indexed by primitive_id) }; diff --git a/cuda/kernel.cu b/cuda/kernel.cu index 175ec3a..ca5c454 100644 --- a/cuda/kernel.cu +++ b/cuda/kernel.cu @@ -357,6 +357,42 @@ extern "C" __global__ void __intersection__heightfield() } +// --------------------------------------------------------------------------- +// Sphere closest-hit program (point cloud / splat rendering) +// --------------------------------------------------------------------------- +extern "C" __global__ void __closesthit__sphere() +{ + const float t_val = optixGetRayTmax(); + const unsigned int primIdx = optixGetPrimitiveIndex(); + + // Get sphere center in object space + float4 sphere_data[1]; + optixGetSphereData(sphere_data); + + // Transform sphere center from object to world space + float m[12]; + optixGetObjectToWorldTransformMatrix(m); + const float cx = m[0]*sphere_data[0].x + m[1]*sphere_data[0].y + m[ 2]*sphere_data[0].z + m[ 3]; + const float cy = m[4]*sphere_data[0].x + m[5]*sphere_data[0].y + m[ 6]*sphere_data[0].z + m[ 7]; + const float cz = m[8]*sphere_data[0].x + m[9]*sphere_data[0].y + m[10]*sphere_data[0].z + m[11]; + + // World-space hit point and normal + const float3 ray_o = optixGetWorldRayOrigin(); + const float3 ray_d = optixGetWorldRayDirection(); + float3 n = normalize(make_float3( + ray_o.x + ray_d.x * t_val - cx, + ray_o.y + ray_d.y * t_val - cy, + ray_o.z + ray_d.z * t_val - cz)); + + optixSetPayload_0(float_as_int(t_val)); + optixSetPayload_1(float_as_int(n.x)); + optixSetPayload_2(float_as_int(n.y)); + optixSetPayload_3(float_as_int(n.z)); + optixSetPayload_4(primIdx); + optixSetPayload_5(optixGetInstanceId()); +} + + extern "C" __global__ void __closesthit__heightfield() { const float t = optixGetRayTmax(); diff --git a/rtxpy/accessor.py b/rtxpy/accessor.py index bc88b2f..688cb8a 100644 --- a/rtxpy/accessor.py +++ b/rtxpy/accessor.py @@ -812,6 +812,137 @@ def place_mesh(self, mesh_source, positions, geometry_id=None, scale=1.0, return vertices, indices, transforms + def place_pointcloud(self, source, geometry_id='pointcloud', + point_size=1.0, color='elevation', + classification=None, returns=None, + bounds=None, subsample=1, max_points=None, + snap_to_terrain=False, colormap=None): + """ + Place a lidar point cloud in the scene as sphere primitives. + + Each point is rendered as an OptiX sphere with hardware-accelerated + ray-sphere intersection. Supports LAS/LAZ files, numpy arrays, + or callable data sources. + + Args: + source: One of: + - str: Path to LAS/LAZ/COPC file (requires laspy) + - ndarray: (N, 3) float32 array of XYZ coordinates + - callable: Function returning (centers_N3, attributes_dict) + geometry_id: Unique identifier for this geometry. Default 'pointcloud'. + point_size: Sphere radius in world units. Default 1.0. + Can be a scalar (uniform) or (N,) array (per-point). + color: Color mode. One of: + - 'elevation': Color by Z value using colormap + - 'intensity': Grayscale from LAS intensity + - 'classification': ASPRS standard colors + - 'rgb': Direct RGB from LAS file + - (r, g, b) tuple: Uniform solid color [0-1] + classification: Optional int or list of ASPRS codes to filter. + Common: 2=ground, 3-5=vegetation, 6=building, 9=water. + returns: Optional 'first', 'last', or 'all' to filter by return. + bounds: Optional (xmin, ymin, xmax, ymax) spatial crop. + subsample: Keep every Nth point (default 1 = all). + max_points: Maximum points to keep (random sample if exceeded). + snap_to_terrain: If True, replace Z with DEM elevation. + colormap: Optional (256, 3) float32 LUT for elevation color mode. + + Returns: + Dict with keys: + 'num_points': Number of points placed + 'geometry_id': The geometry ID used + 'bounds': (xmin, ymin, zmin, xmax, ymax, zmax) of placed points + """ + from .pointcloud import load_pointcloud, build_colors + + # Load and filter point cloud data + centers, attributes = load_pointcloud( + source, + classification=classification, + returns=returns, + bounds=bounds, + subsample=subsample, + max_points=max_points, + ) + + if len(centers) == 0: + return {'num_points': 0, 'geometry_id': geometry_id, 'bounds': None} + + terrain = self._obj.values + H, W = terrain.shape + + # Convert coordinates to terrain pixel space if needed + # Check if coordinates are in world/geo space vs pixel space + # by comparing ranges to terrain dimensions + x_range = centers[:, 0].max() - centers[:, 0].min() + y_range = centers[:, 1].max() - centers[:, 1].min() + + # Get pixel spacing from xarray coords + psx, psy = 1.0, 1.0 + if 'x' in self._obj.coords and len(self._obj.coords['x']) > 1: + x_coords = self._obj.coords['x'].values + psx = abs(float(x_coords[1] - x_coords[0])) + if 'y' in self._obj.coords and len(self._obj.coords['y']) > 1: + y_coords = self._obj.coords['y'].values + psy = abs(float(y_coords[1] - y_coords[0])) + + # If coords look like they're in a projected CRS (large values), + # convert to pixel space + if psx != 1.0 or psy != 1.0: + x_origin = float(self._obj.coords['x'].values[0]) + y_origin = float(self._obj.coords['y'].values[0]) + centers[:, 0] = (centers[:, 0] - x_origin) / psx + centers[:, 1] = (centers[:, 1] - y_origin) / psy + + # Snap Z to terrain if requested + if snap_to_terrain: + for i in range(len(centers)): + vx, vy = centers[i, 0], centers[i, 1] + z = self._bilinear_terrain_z(terrain, vx, vy, 1.0, 1.0) + centers[i, 2] = z + else: + # Scale Z by pixel spacing to match terrain units + if psx != 1.0: + centers[:, 2] = centers[:, 2] / psx + + # Build per-point colors + point_colors = build_colors( + centers, attributes, color_mode=color, colormap=colormap) + + # Build radii array + if np.isscalar(point_size): + radii = float(point_size) + else: + radii = np.asarray(point_size, dtype=np.float32) + + # Add sphere geometry to the RTX scene + result = self._rtx.add_sphere_geometry( + geometry_id, + centers.ravel(), + radii, + colors=point_colors.ravel(), + ) + + if result != 0: + raise RuntimeError(f"Failed to add sphere geometry '{geometry_id}'") + + # Set geometry color to a marker value so the render kernel + # knows to use per-point colors (alpha > 0 triggers solid color) + self._geometry_colors[geometry_id] = (0.5, 0.5, 0.5) + + point_bounds = ( + float(centers[:, 0].min()), float(centers[:, 1].min()), + float(centers[:, 2].min()), + float(centers[:, 0].max()), float(centers[:, 1].max()), + float(centers[:, 2].max()), + ) + + return { + 'num_points': len(centers), + 'geometry_id': geometry_id, + 'bounds': point_bounds, + } + _GEOJSON_PALETTE = [ (0.90, 0.22, 0.20), # red (0.20, 0.56, 0.90), # blue diff --git a/rtxpy/kernel.ptx b/rtxpy/kernel.ptx index b3bac86..df7cdd1 100644 --- a/rtxpy/kernel.ptx +++ b/rtxpy/kernel.ptx @@ -7,11 +7,11 @@ // .version 9.1 -.target sm_86 +.target sm_75 .address_size 64 // .globl __raygen__main -.const .align 8 .b8 params[88]; +.const .align 8 .b8 params[96]; .visible .entry __raygen__main() { @@ -767,6 +767,757 @@ $L__BB3_38: $L__BB3_40: ret; +} + // .globl __closesthit__sphere +.visible .entry __closesthit__sphere() +{ + .reg .pred %p<13>; + .reg .f32 %f<464>; + .reg .b32 %r<177>; + .reg .b64 %rd<128>; + + + // begin inline asm + call (%f164), _optix_get_ray_tmax, (); + // end inline asm + // begin inline asm + call (%r9), _optix_read_primitive_idx, (); + // end inline asm + // begin inline asm + call (%f165, %f166, %f167, %f168), _optix_get_sphere_data_current_hit, (); + // end inline asm + // begin inline asm + call (%r10), _optix_get_transform_list_size, (); + // end inline asm + setp.eq.s32 %p1, %r10, 0; + mov.f32 %f436, 0f3F800000; + mov.f32 %f435, 0f00000000; + mov.f32 %f437, %f435; + mov.f32 %f438, %f435; + mov.f32 %f431, %f435; + mov.f32 %f432, %f435; + mov.f32 %f433, %f436; + mov.f32 %f434, %f435; + mov.f32 %f439, %f435; + mov.f32 %f428, %f435; + mov.f32 %f429, %f435; + mov.f32 %f430, %f436; + @%p1 bra $L__BB4_18; + + // begin inline asm + call (%r11), _optix_get_transform_list_size, (); + // end inline asm + // begin inline asm + call (%f181), _optix_get_ray_time, (); + // end inline asm + setp.lt.s32 %p2, %r11, 1; + @%p2 bra $L__BB4_18; + + add.s32 %r175, %r11, 1; + mov.u32 %r176, 1; + +$L__BB4_3: + .pragma "nounroll"; + add.s32 %r13, %r175, -2; + // begin inline asm + call (%rd8), _optix_get_transform_list_handle, (%r13); + // end inline asm + // begin inline asm + call (%r14), _optix_get_transform_type_from_handle, (%rd8); + // end inline asm + or.b32 %r15, %r14, 1; + setp.eq.s32 %p3, %r15, 3; + @%p3 bra $L__BB4_9; + bra.uni $L__BB4_4; + +$L__BB4_9: + setp.eq.s32 %p6, %r14, 2; + @%p6 bra $L__BB4_13; + bra.uni $L__BB4_10; + +$L__BB4_13: + // begin inline asm + call (%rd80), _optix_get_matrix_motion_transform_from_handle, (%rd8); + // end inline asm + // begin inline asm + cvta.to.global.u64 %rd82, %rd80; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r103,%r104,%r105,%r106}, [%rd82]; + // end inline asm + add.s64 %rd86, %rd80, 16; + // begin inline asm + cvta.to.global.u64 %rd85, %rd86; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r107,%r108,%r109,%r110}, [%rd85]; + // end inline asm + add.s64 %rd89, %rd80, 32; + // begin inline asm + cvta.to.global.u64 %rd88, %rd89; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r111,%r112,%r113,%r114}, [%rd88]; + // end inline asm + add.s64 %rd92, %rd80, 48; + // begin inline asm + cvta.to.global.u64 %rd91, %rd92; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r115,%r116,%r117,%r118}, [%rd91]; + // end inline asm + add.s64 %rd95, %rd80, 64; + // begin inline asm + cvta.to.global.u64 %rd94, %rd95; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r119,%r120,%r121,%r122}, [%rd94]; + // end inline asm + add.s64 %rd98, %rd80, 80; + // begin inline asm + cvta.to.global.u64 %rd97, %rd98; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r123,%r124,%r125,%r126}, [%rd97]; + // end inline asm + add.s64 %rd101, %rd80, 96; + // begin inline asm + cvta.to.global.u64 %rd100, %rd101; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r127,%r128,%r129,%r130}, [%rd100]; + // end inline asm + add.s64 %rd104, %rd80, 112; + // begin inline asm + cvta.to.global.u64 %rd103, %rd104; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r131,%r132,%r133,%r134}, [%rd103]; + // end inline asm + mov.b32 %f302, %r106; + mov.b32 %f303, %r107; + and.b32 %r147, %r105, 65535; + add.s32 %r148, %r147, -1; + cvt.rn.f32.s32 %f304, %r148; + sub.ftz.f32 %f305, %f181, %f302; + sub.ftz.f32 %f306, %f303, %f302; + div.approx.ftz.f32 %f307, %f305, %f306; + mul.ftz.f32 %f308, %f307, %f304; + min.ftz.f32 %f309, %f304, %f308; + mov.f32 %f310, 0f00000000; + max.ftz.f32 %f311, %f310, %f309; + setp.num.ftz.f32 %p9, %f311, %f311; + selp.f32 %f312, %f311, 0f00000000, %p9; + cvt.rmi.ftz.f32.f32 %f313, %f312; + add.ftz.f32 %f314, %f304, 0fBF800000; + min.ftz.f32 %f315, %f313, %f314; + sub.ftz.f32 %f91, %f312, %f315; + cvt.rzi.ftz.s32.f32 %r149, %f315; + cvt.s64.s32 %rd7, %r149; + mul.wide.s32 %rd115, %r149, 48; + add.s64 %rd107, %rd89, %rd115; + // begin inline asm + cvta.to.global.u64 %rd106, %rd107; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r135,%r136,%r137,%r138}, [%rd106]; + // end inline asm + mov.b32 %f430, %r135; + mov.b32 %f429, %r136; + mov.b32 %f428, %r137; + mov.b32 %f439, %r138; + add.s64 %rd110, %rd107, 16; + // begin inline asm + cvta.to.global.u64 %rd109, %rd110; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r139,%r140,%r141,%r142}, [%rd109]; + // end inline asm + mov.b32 %f434, %r139; + mov.b32 %f433, %r140; + mov.b32 %f432, %r141; + mov.b32 %f431, %r142; + add.s64 %rd113, %rd107, 32; + // begin inline asm + cvta.to.global.u64 %rd112, %rd113; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r143,%r144,%r145,%r146}, [%rd112]; + // end inline asm + mov.b32 %f438, %r143; + mov.b32 %f437, %r144; + mov.b32 %f436, %r145; + mov.b32 %f435, %r146; + setp.leu.ftz.f32 %p10, %f91, 0f00000000; + @%p10 bra $L__BB4_15; + + mov.f32 %f316, 0f3F800000; + sub.ftz.f32 %f317, %f316, %f91; + mul.lo.s64 %rd125, %rd7, 48; + add.s64 %rd126, %rd80, %rd125; + add.s64 %rd117, %rd126, 80; + // begin inline asm + cvta.to.global.u64 %rd116, %rd117; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r150,%r151,%r152,%r153}, [%rd116]; + // end inline asm + mov.b32 %f318, %r150; + mov.b32 %f319, %r151; + mov.b32 %f320, %r152; + mov.b32 %f321, %r153; + mul.ftz.f32 %f322, %f91, %f318; + mul.ftz.f32 %f323, %f91, %f319; + mul.ftz.f32 %f324, %f91, %f320; + mul.ftz.f32 %f325, %f91, %f321; + fma.rn.ftz.f32 %f430, %f317, %f430, %f322; + fma.rn.ftz.f32 %f429, %f317, %f429, %f323; + fma.rn.ftz.f32 %f428, %f317, %f428, %f324; + fma.rn.ftz.f32 %f439, %f317, %f439, %f325; + add.s64 %rd120, %rd126, 96; + // begin inline asm + cvta.to.global.u64 %rd119, %rd120; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r154,%r155,%r156,%r157}, [%rd119]; + // end inline asm + mov.b32 %f326, %r154; + mov.b32 %f327, %r155; + mov.b32 %f328, %r156; + mov.b32 %f329, %r157; + mul.ftz.f32 %f330, %f91, %f326; + mul.ftz.f32 %f331, %f91, %f327; + mul.ftz.f32 %f332, %f91, %f328; + mul.ftz.f32 %f333, %f91, %f329; + fma.rn.ftz.f32 %f434, %f317, %f434, %f330; + fma.rn.ftz.f32 %f433, %f317, %f433, %f331; + fma.rn.ftz.f32 %f432, %f317, %f432, %f332; + fma.rn.ftz.f32 %f431, %f317, %f431, %f333; + add.s64 %rd123, %rd126, 112; + // begin inline asm + cvta.to.global.u64 %rd122, %rd123; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r158,%r159,%r160,%r161}, [%rd122]; + // end inline asm + mov.b32 %f334, %r158; + mov.b32 %f335, %r159; + mov.b32 %f336, %r160; + mov.b32 %f337, %r161; + mul.ftz.f32 %f338, %f91, %f334; + mul.ftz.f32 %f339, %f91, %f335; + mul.ftz.f32 %f340, %f91, %f336; + mul.ftz.f32 %f341, %f91, %f337; + fma.rn.ftz.f32 %f438, %f317, %f438, %f338; + fma.rn.ftz.f32 %f437, %f317, %f437, %f339; + fma.rn.ftz.f32 %f436, %f317, %f436, %f340; + fma.rn.ftz.f32 %f435, %f317, %f435, %f341; + bra.uni $L__BB4_15; + +$L__BB4_4: + mov.f32 %f428, 0f00000000; + mov.f32 %f430, 0f3F800000; + setp.eq.s32 %p4, %r14, 4; + @%p4 bra $L__BB4_7; + + setp.ne.s32 %p5, %r14, 1; + mov.f32 %f429, %f428; + mov.f32 %f431, %f428; + mov.f32 %f432, %f428; + mov.f32 %f433, %f430; + mov.f32 %f434, %f428; + mov.f32 %f435, %f428; + mov.f32 %f436, %f430; + mov.f32 %f437, %f428; + mov.f32 %f438, %f428; + mov.f32 %f439, %f428; + @%p5 bra $L__BB4_15; + + // begin inline asm + call (%rd10), _optix_get_static_transform_from_handle, (%rd8); + // end inline asm + add.s64 %rd127, %rd10, 16; + bra.uni $L__BB4_8; + +$L__BB4_10: + // begin inline asm + call (%rd23), _optix_get_srt_motion_transform_from_handle, (%rd8); + // end inline asm + // begin inline asm + cvta.to.global.u64 %rd25, %rd23; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r28,%r29,%r30,%r31}, [%rd25]; + // end inline asm + add.s64 %rd29, %rd23, 16; + // begin inline asm + cvta.to.global.u64 %rd28, %rd29; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r32,%r33,%r34,%r35}, [%rd28]; + // end inline asm + add.s64 %rd32, %rd23, 32; + // begin inline asm + cvta.to.global.u64 %rd31, %rd32; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r36,%r37,%r38,%r39}, [%rd31]; + // end inline asm + add.s64 %rd35, %rd23, 48; + // begin inline asm + cvta.to.global.u64 %rd34, %rd35; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r40,%r41,%r42,%r43}, [%rd34]; + // end inline asm + add.s64 %rd38, %rd23, 64; + // begin inline asm + cvta.to.global.u64 %rd37, %rd38; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r44,%r45,%r46,%r47}, [%rd37]; + // end inline asm + add.s64 %rd41, %rd23, 80; + // begin inline asm + cvta.to.global.u64 %rd40, %rd41; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r48,%r49,%r50,%r51}, [%rd40]; + // end inline asm + add.s64 %rd44, %rd23, 96; + // begin inline asm + cvta.to.global.u64 %rd43, %rd44; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r52,%r53,%r54,%r55}, [%rd43]; + // end inline asm + add.s64 %rd47, %rd23, 112; + // begin inline asm + cvta.to.global.u64 %rd46, %rd47; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r56,%r57,%r58,%r59}, [%rd46]; + // end inline asm + add.s64 %rd50, %rd23, 128; + // begin inline asm + cvta.to.global.u64 %rd49, %rd50; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r60,%r61,%r62,%r63}, [%rd49]; + // end inline asm + add.s64 %rd53, %rd23, 144; + // begin inline asm + cvta.to.global.u64 %rd52, %rd53; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r64,%r65,%r66,%r67}, [%rd52]; + // end inline asm + mov.b32 %f196, %r31; + mov.b32 %f197, %r32; + and.b32 %r84, %r30, 65535; + add.s32 %r85, %r84, -1; + cvt.rn.f32.s32 %f198, %r85; + sub.ftz.f32 %f199, %f181, %f196; + sub.ftz.f32 %f200, %f197, %f196; + div.approx.ftz.f32 %f201, %f199, %f200; + mul.ftz.f32 %f202, %f201, %f198; + min.ftz.f32 %f203, %f198, %f202; + mov.f32 %f204, 0f00000000; + max.ftz.f32 %f205, %f204, %f203; + setp.num.ftz.f32 %p7, %f205, %f205; + selp.f32 %f206, %f205, 0f00000000, %p7; + cvt.rmi.ftz.f32.f32 %f207, %f206; + add.ftz.f32 %f208, %f198, 0fBF800000; + min.ftz.f32 %f209, %f207, %f208; + sub.ftz.f32 %f30, %f206, %f209; + cvt.rzi.ftz.s32.f32 %r86, %f209; + mul.wide.s32 %rd67, %r86, 64; + add.s64 %rd56, %rd32, %rd67; + // begin inline asm + cvta.to.global.u64 %rd55, %rd56; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r68,%r69,%r70,%r71}, [%rd55]; + // end inline asm + mov.b32 %f412, %r68; + mov.b32 %f413, %r69; + mov.b32 %f414, %r70; + mov.b32 %f415, %r71; + add.s64 %rd59, %rd56, 16; + // begin inline asm + cvta.to.global.u64 %rd58, %rd59; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r72,%r73,%r74,%r75}, [%rd58]; + // end inline asm + mov.b32 %f416, %r72; + mov.b32 %f417, %r73; + mov.b32 %f418, %r74; + mov.b32 %f419, %r75; + add.s64 %rd62, %rd56, 32; + // begin inline asm + cvta.to.global.u64 %rd61, %rd62; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r76,%r77,%r78,%r79}, [%rd61]; + // end inline asm + mov.b32 %f420, %r76; + mov.b32 %f421, %r77; + mov.b32 %f422, %r78; + mov.b32 %f423, %r79; + add.s64 %rd65, %rd56, 48; + // begin inline asm + cvta.to.global.u64 %rd64, %rd65; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r80,%r81,%r82,%r83}, [%rd64]; + // end inline asm + mov.b32 %f424, %r80; + mov.b32 %f425, %r81; + mov.b32 %f426, %r82; + mov.b32 %f427, %r83; + setp.leu.ftz.f32 %p8, %f30, 0f00000000; + @%p8 bra $L__BB4_12; + + mov.f32 %f210, 0f3F800000; + sub.ftz.f32 %f211, %f210, %f30; + add.s64 %rd69, %rd56, 64; + // begin inline asm + cvta.to.global.u64 %rd68, %rd69; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r87,%r88,%r89,%r90}, [%rd68]; + // end inline asm + mov.b32 %f212, %r87; + mov.b32 %f213, %r88; + mov.b32 %f214, %r89; + mov.b32 %f215, %r90; + mul.ftz.f32 %f216, %f30, %f212; + mul.ftz.f32 %f217, %f30, %f213; + mul.ftz.f32 %f218, %f30, %f214; + mul.ftz.f32 %f219, %f30, %f215; + fma.rn.ftz.f32 %f412, %f211, %f412, %f216; + fma.rn.ftz.f32 %f413, %f211, %f413, %f217; + fma.rn.ftz.f32 %f414, %f211, %f414, %f218; + fma.rn.ftz.f32 %f415, %f211, %f415, %f219; + add.s64 %rd72, %rd56, 80; + // begin inline asm + cvta.to.global.u64 %rd71, %rd72; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r91,%r92,%r93,%r94}, [%rd71]; + // end inline asm + mov.b32 %f220, %r91; + mov.b32 %f221, %r92; + mov.b32 %f222, %r93; + mov.b32 %f223, %r94; + mul.ftz.f32 %f224, %f30, %f220; + mul.ftz.f32 %f225, %f30, %f221; + mul.ftz.f32 %f226, %f30, %f222; + mul.ftz.f32 %f227, %f30, %f223; + fma.rn.ftz.f32 %f416, %f211, %f416, %f224; + fma.rn.ftz.f32 %f417, %f211, %f417, %f225; + fma.rn.ftz.f32 %f418, %f211, %f418, %f226; + fma.rn.ftz.f32 %f419, %f211, %f419, %f227; + add.s64 %rd75, %rd56, 96; + // begin inline asm + cvta.to.global.u64 %rd74, %rd75; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r95,%r96,%r97,%r98}, [%rd74]; + // end inline asm + mov.b32 %f228, %r95; + mov.b32 %f229, %r96; + mov.b32 %f230, %r97; + mov.b32 %f231, %r98; + mul.ftz.f32 %f232, %f30, %f228; + mul.ftz.f32 %f233, %f30, %f229; + mul.ftz.f32 %f234, %f30, %f230; + mul.ftz.f32 %f235, %f30, %f231; + fma.rn.ftz.f32 %f420, %f211, %f420, %f232; + fma.rn.ftz.f32 %f236, %f211, %f421, %f233; + fma.rn.ftz.f32 %f237, %f211, %f422, %f234; + fma.rn.ftz.f32 %f238, %f211, %f423, %f235; + add.s64 %rd78, %rd56, 112; + // begin inline asm + cvta.to.global.u64 %rd77, %rd78; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r99,%r100,%r101,%r102}, [%rd77]; + // end inline asm + mov.b32 %f239, %r99; + mov.b32 %f240, %r100; + mov.b32 %f241, %r101; + mov.b32 %f242, %r102; + mul.ftz.f32 %f243, %f30, %f239; + mul.ftz.f32 %f244, %f30, %f240; + mul.ftz.f32 %f245, %f30, %f241; + mul.ftz.f32 %f246, %f30, %f242; + fma.rn.ftz.f32 %f247, %f211, %f424, %f243; + fma.rn.ftz.f32 %f425, %f211, %f425, %f244; + fma.rn.ftz.f32 %f426, %f211, %f426, %f245; + fma.rn.ftz.f32 %f427, %f211, %f427, %f246; + mul.ftz.f32 %f248, %f237, %f237; + fma.rn.ftz.f32 %f249, %f236, %f236, %f248; + fma.rn.ftz.f32 %f250, %f238, %f238, %f249; + fma.rn.ftz.f32 %f251, %f247, %f247, %f250; + rsqrt.approx.ftz.f32 %f252, %f251; + mul.ftz.f32 %f421, %f236, %f252; + mul.ftz.f32 %f422, %f237, %f252; + mul.ftz.f32 %f423, %f238, %f252; + mul.ftz.f32 %f424, %f252, %f247; + +$L__BB4_12: + mul.ftz.f32 %f253, %f422, %f422; + mul.ftz.f32 %f254, %f421, %f421; + sub.ftz.f32 %f255, %f254, %f253; + mul.ftz.f32 %f256, %f423, %f423; + sub.ftz.f32 %f257, %f255, %f256; + fma.rn.ftz.f32 %f258, %f424, %f424, %f257; + mul.ftz.f32 %f259, %f423, %f424; + mul.ftz.f32 %f260, %f421, %f422; + sub.ftz.f32 %f261, %f260, %f259; + add.ftz.f32 %f262, %f261, %f261; + mul.ftz.f32 %f263, %f422, %f424; + mul.ftz.f32 %f264, %f421, %f423; + add.ftz.f32 %f265, %f264, %f263; + add.ftz.f32 %f266, %f265, %f265; + add.ftz.f32 %f267, %f260, %f259; + add.ftz.f32 %f268, %f267, %f267; + sub.ftz.f32 %f269, %f253, %f254; + sub.ftz.f32 %f270, %f269, %f256; + fma.rn.ftz.f32 %f271, %f424, %f424, %f270; + mul.ftz.f32 %f272, %f421, %f424; + mul.ftz.f32 %f273, %f422, %f423; + sub.ftz.f32 %f274, %f273, %f272; + add.ftz.f32 %f275, %f274, %f274; + sub.ftz.f32 %f276, %f264, %f263; + add.ftz.f32 %f277, %f276, %f276; + add.ftz.f32 %f278, %f273, %f272; + add.ftz.f32 %f279, %f278, %f278; + neg.ftz.f32 %f280, %f254; + sub.ftz.f32 %f281, %f280, %f253; + add.ftz.f32 %f282, %f281, %f256; + fma.rn.ftz.f32 %f283, %f424, %f424, %f282; + mul.ftz.f32 %f284, %f418, %f262; + fma.rn.ftz.f32 %f285, %f415, %f258, %f284; + fma.rn.ftz.f32 %f286, %f420, %f266, %f285; + add.ftz.f32 %f439, %f425, %f286; + mul.ftz.f32 %f287, %f415, %f268; + fma.rn.ftz.f32 %f288, %f418, %f271, %f287; + fma.rn.ftz.f32 %f289, %f420, %f275, %f288; + add.ftz.f32 %f431, %f426, %f289; + mul.ftz.f32 %f290, %f418, %f279; + fma.rn.ftz.f32 %f291, %f415, %f277, %f290; + fma.rn.ftz.f32 %f292, %f420, %f283, %f291; + add.ftz.f32 %f435, %f427, %f292; + mul.ftz.f32 %f293, %f417, %f262; + fma.rn.ftz.f32 %f294, %f414, %f258, %f293; + fma.rn.ftz.f32 %f428, %f419, %f266, %f294; + mul.ftz.f32 %f295, %f414, %f268; + fma.rn.ftz.f32 %f296, %f417, %f271, %f295; + fma.rn.ftz.f32 %f432, %f419, %f275, %f296; + mul.ftz.f32 %f297, %f417, %f279; + fma.rn.ftz.f32 %f298, %f414, %f277, %f297; + fma.rn.ftz.f32 %f436, %f419, %f283, %f298; + mul.ftz.f32 %f299, %f416, %f262; + fma.rn.ftz.f32 %f429, %f413, %f258, %f299; + mul.ftz.f32 %f300, %f413, %f268; + fma.rn.ftz.f32 %f433, %f416, %f271, %f300; + mul.ftz.f32 %f301, %f416, %f279; + fma.rn.ftz.f32 %f437, %f413, %f277, %f301; + mul.ftz.f32 %f430, %f412, %f258; + mul.ftz.f32 %f434, %f412, %f268; + mul.ftz.f32 %f438, %f412, %f277; + bra.uni $L__BB4_15; + +$L__BB4_7: + // begin inline asm + call (%rd127), _optix_get_instance_transform_from_handle, (%rd8); + // end inline asm + +$L__BB4_8: + // begin inline asm + cvta.to.global.u64 %rd14, %rd127; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r16,%r17,%r18,%r19}, [%rd14]; + // end inline asm + mov.b32 %f430, %r16; + mov.b32 %f429, %r17; + mov.b32 %f428, %r18; + mov.b32 %f439, %r19; + add.s64 %rd18, %rd127, 16; + // begin inline asm + cvta.to.global.u64 %rd17, %rd18; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r20,%r21,%r22,%r23}, [%rd17]; + // end inline asm + mov.b32 %f434, %r20; + mov.b32 %f433, %r21; + mov.b32 %f432, %r22; + mov.b32 %f431, %r23; + add.s64 %rd21, %rd127, 32; + // begin inline asm + cvta.to.global.u64 %rd20, %rd21; + // end inline asm + // begin inline asm + ld.global.v4.u32 {%r24,%r25,%r26,%r27}, [%rd20]; + // end inline asm + mov.b32 %f438, %r24; + mov.b32 %f437, %r25; + mov.b32 %f436, %r26; + mov.b32 %f435, %r27; + +$L__BB4_15: + setp.eq.s32 %p11, %r176, 1; + @%p11 bra $L__BB4_17; + + mul.ftz.f32 %f342, %f408, %f430; + fma.rn.ftz.f32 %f343, %f404, %f429, %f342; + fma.rn.ftz.f32 %f128, %f400, %f428, %f343; + mul.ftz.f32 %f344, %f409, %f430; + fma.rn.ftz.f32 %f345, %f405, %f429, %f344; + fma.rn.ftz.f32 %f129, %f401, %f428, %f345; + mul.ftz.f32 %f346, %f410, %f430; + fma.rn.ftz.f32 %f347, %f406, %f429, %f346; + fma.rn.ftz.f32 %f130, %f402, %f428, %f347; + mul.ftz.f32 %f348, %f411, %f430; + fma.rn.ftz.f32 %f349, %f407, %f429, %f348; + fma.rn.ftz.f32 %f350, %f403, %f428, %f349; + add.ftz.f32 %f439, %f350, %f439; + mul.ftz.f32 %f351, %f408, %f434; + fma.rn.ftz.f32 %f352, %f404, %f433, %f351; + fma.rn.ftz.f32 %f132, %f400, %f432, %f352; + mul.ftz.f32 %f353, %f409, %f434; + fma.rn.ftz.f32 %f354, %f405, %f433, %f353; + fma.rn.ftz.f32 %f133, %f401, %f432, %f354; + mul.ftz.f32 %f355, %f410, %f434; + fma.rn.ftz.f32 %f356, %f406, %f433, %f355; + fma.rn.ftz.f32 %f134, %f402, %f432, %f356; + mul.ftz.f32 %f357, %f411, %f434; + fma.rn.ftz.f32 %f358, %f407, %f433, %f357; + fma.rn.ftz.f32 %f359, %f403, %f432, %f358; + add.ftz.f32 %f431, %f431, %f359; + mul.ftz.f32 %f360, %f408, %f438; + fma.rn.ftz.f32 %f361, %f404, %f437, %f360; + fma.rn.ftz.f32 %f136, %f400, %f436, %f361; + mul.ftz.f32 %f362, %f409, %f438; + fma.rn.ftz.f32 %f363, %f405, %f437, %f362; + fma.rn.ftz.f32 %f137, %f401, %f436, %f363; + mul.ftz.f32 %f364, %f410, %f438; + fma.rn.ftz.f32 %f365, %f406, %f437, %f364; + fma.rn.ftz.f32 %f138, %f402, %f436, %f365; + mul.ftz.f32 %f366, %f411, %f438; + fma.rn.ftz.f32 %f367, %f407, %f437, %f366; + fma.rn.ftz.f32 %f368, %f403, %f436, %f367; + add.ftz.f32 %f435, %f435, %f368; + mov.f32 %f428, %f130; + mov.f32 %f429, %f129; + mov.f32 %f430, %f128; + mov.f32 %f432, %f134; + mov.f32 %f433, %f133; + mov.f32 %f434, %f132; + mov.f32 %f436, %f138; + mov.f32 %f437, %f137; + mov.f32 %f438, %f136; + +$L__BB4_17: + add.s32 %r176, %r176, -1; + add.s32 %r175, %r175, -1; + setp.gt.s32 %p12, %r175, 1; + mov.f32 %f400, %f438; + mov.f32 %f401, %f437; + mov.f32 %f402, %f436; + mov.f32 %f403, %f435; + mov.f32 %f404, %f434; + mov.f32 %f405, %f433; + mov.f32 %f406, %f432; + mov.f32 %f407, %f431; + mov.f32 %f408, %f430; + mov.f32 %f409, %f429; + mov.f32 %f410, %f428; + mov.f32 %f411, %f439; + @%p12 bra $L__BB4_3; + +$L__BB4_18: + mul.ftz.f32 %f375, %f165, %f430; + fma.rn.ftz.f32 %f376, %f166, %f429, %f375; + fma.rn.ftz.f32 %f377, %f167, %f428, %f376; + add.ftz.f32 %f378, %f439, %f377; + mul.ftz.f32 %f379, %f165, %f434; + fma.rn.ftz.f32 %f380, %f166, %f433, %f379; + fma.rn.ftz.f32 %f381, %f167, %f432, %f380; + add.ftz.f32 %f382, %f431, %f381; + mul.ftz.f32 %f383, %f165, %f438; + fma.rn.ftz.f32 %f384, %f166, %f437, %f383; + fma.rn.ftz.f32 %f385, %f167, %f436, %f384; + add.ftz.f32 %f386, %f435, %f385; + // begin inline asm + call (%f369), _optix_get_world_ray_origin_x, (); + // end inline asm + // begin inline asm + call (%f370), _optix_get_world_ray_origin_y, (); + // end inline asm + // begin inline asm + call (%f371), _optix_get_world_ray_origin_z, (); + // end inline asm + // begin inline asm + call (%f372), _optix_get_world_ray_direction_x, (); + // end inline asm + // begin inline asm + call (%f373), _optix_get_world_ray_direction_y, (); + // end inline asm + // begin inline asm + call (%f374), _optix_get_world_ray_direction_z, (); + // end inline asm + fma.rn.ftz.f32 %f387, %f164, %f372, %f369; + sub.ftz.f32 %f388, %f387, %f378; + fma.rn.ftz.f32 %f389, %f164, %f373, %f370; + sub.ftz.f32 %f390, %f389, %f382; + fma.rn.ftz.f32 %f391, %f164, %f374, %f371; + sub.ftz.f32 %f392, %f391, %f386; + mul.ftz.f32 %f393, %f390, %f390; + fma.rn.ftz.f32 %f394, %f388, %f388, %f393; + fma.rn.ftz.f32 %f395, %f392, %f392, %f394; + rsqrt.approx.ftz.f32 %f396, %f395; + mul.ftz.f32 %f397, %f388, %f396; + mul.ftz.f32 %f398, %f390, %f396; + mul.ftz.f32 %f399, %f396, %f392; + mov.b32 %r163, %f164; + mov.u32 %r162, 0; + // begin inline asm + call _optix_set_payload, (%r162, %r163); + // end inline asm + mov.b32 %r165, %f397; + mov.u32 %r164, 1; + // begin inline asm + call _optix_set_payload, (%r164, %r165); + // end inline asm + mov.b32 %r167, %f398; + mov.u32 %r166, 2; + // begin inline asm + call _optix_set_payload, (%r166, %r167); + // end inline asm + mov.b32 %r169, %f399; + mov.u32 %r168, 3; + // begin inline asm + call _optix_set_payload, (%r168, %r169); + // end inline asm + mov.u32 %r170, 4; + // begin inline asm + call _optix_set_payload, (%r170, %r9); + // end inline asm + // begin inline asm + call (%r172), _optix_read_instance_id, (); + // end inline asm + mov.u32 %r173, 5; + // begin inline asm + call _optix_set_payload, (%r173, %r172); + // end inline asm + ret; + } // .globl __closesthit__heightfield .visible .entry __closesthit__heightfield() diff --git a/rtxpy/pointcloud.py b/rtxpy/pointcloud.py new file mode 100644 index 0000000..d23398d --- /dev/null +++ b/rtxpy/pointcloud.py @@ -0,0 +1,259 @@ +""" +Point cloud loading and processing for rtxpy. + +Supports LAS/LAZ files via laspy, numpy arrays, and callable data sources. +Provides filtering by classification, return number, spatial bounds, +subsampling, and color mode generation. +""" + +import numpy as np + +# ASPRS LiDAR classification codes → default RGBA colors +CLASSIFICATION_COLORS = { + 0: (0.6, 0.6, 0.6, 1.0), # Never classified (gray) + 1: (0.5, 0.5, 0.5, 1.0), # Unassigned (gray) + 2: (0.6, 0.4, 0.2, 1.0), # Ground (brown) + 3: (0.1, 0.6, 0.1, 1.0), # Low vegetation (light green) + 4: (0.0, 0.5, 0.0, 1.0), # Medium vegetation (green) + 5: (0.0, 0.35, 0.0, 1.0), # High vegetation (dark green) + 6: (0.8, 0.2, 0.2, 1.0), # Building (red) + 7: (0.5, 0.5, 0.5, 1.0), # Low point / noise (gray) + 8: (0.5, 0.5, 0.5, 1.0), # Reserved + 9: (0.2, 0.4, 0.8, 1.0), # Water (blue) + 10: (0.7, 0.7, 0.3, 1.0), # Rail (yellow) + 11: (0.3, 0.3, 0.3, 1.0), # Road surface (dark gray) + 12: (0.5, 0.5, 0.5, 1.0), # Reserved + 13: (0.9, 0.9, 0.0, 1.0), # Wire - guard (yellow) + 14: (0.9, 0.6, 0.0, 1.0), # Wire - conductor (orange) + 15: (0.7, 0.7, 0.7, 1.0), # Transmission tower (light gray) + 16: (0.9, 0.9, 0.0, 1.0), # Wire - connector (yellow) + 17: (0.4, 0.4, 0.8, 1.0), # Bridge deck (blue-gray) + 18: (0.5, 0.5, 0.5, 1.0), # High noise +} + + +def load_pointcloud(source, classification=None, returns=None, + bounds=None, subsample=1, max_points=None): + """ + Load a point cloud from a file path or numpy array. + + Args: + source: One of: + - str: Path to LAS/LAZ/COPC file (requires laspy) + - ndarray: (N, 3) float32 array of XYZ coordinates + - callable: Function returning (centers_N3, attributes_dict) + classification: Optional int or list of ints to filter by ASPRS class. + returns: Optional 'first', 'last', or 'all' to filter by return number. + bounds: Optional (xmin, ymin, xmax, ymax) spatial crop. + subsample: Keep every Nth point (default 1 = all). + max_points: Maximum number of points to keep (random sample if exceeded). + + Returns: + Tuple of (centers, attributes) where: + - centers: (N, 3) float32 array of XYZ coordinates + - attributes: dict with optional keys: + 'intensity': (N,) float32 [0, 1] + 'classification': (N,) int32 + 'rgb': (N, 3) float32 [0, 1] + 'return_number': (N,) int32 + 'number_of_returns': (N,) int32 + """ + if callable(source) and not isinstance(source, (str, np.ndarray)): + centers, attributes = source() + centers = np.asarray(centers, dtype=np.float32) + elif isinstance(source, np.ndarray): + centers = np.asarray(source, dtype=np.float32) + if centers.ndim == 1: + centers = centers.reshape(-1, 3) + attributes = {} + elif isinstance(source, str): + centers, attributes = _load_las(source) + else: + raise TypeError(f"Unsupported source type: {type(source)}") + + # Apply filters + mask = np.ones(len(centers), dtype=bool) + + if classification is not None: + if 'classification' in attributes: + if isinstance(classification, int): + classification = [classification] + cls = attributes['classification'] + mask &= np.isin(cls, classification) + + if returns is not None and 'return_number' in attributes: + rn = attributes['return_number'] + nr = attributes.get('number_of_returns', rn) + if returns == 'first': + mask &= rn == 1 + elif returns == 'last': + mask &= rn == nr + + if bounds is not None: + xmin, ymin, xmax, ymax = bounds + mask &= ((centers[:, 0] >= xmin) & (centers[:, 0] <= xmax) & + (centers[:, 1] >= ymin) & (centers[:, 1] <= ymax)) + + # Apply mask + if not mask.all(): + centers = centers[mask] + attributes = {k: v[mask] for k, v in attributes.items()} + + # Subsample + if subsample > 1: + centers = centers[::subsample] + attributes = {k: v[::subsample] for k, v in attributes.items()} + + # Cap total points + if max_points is not None and len(centers) > max_points: + rng = np.random.default_rng(42) + idx = rng.choice(len(centers), max_points, replace=False) + idx.sort() + centers = centers[idx] + attributes = {k: v[idx] for k, v in attributes.items()} + + return centers, attributes + + +def _load_las(filepath): + """Load a LAS/LAZ file using laspy.""" + try: + import laspy + except ImportError: + raise ImportError( + "laspy is required for LAS/LAZ file loading. " + "Install with: pip install laspy[lazrs]" + ) + + las = laspy.read(filepath) + + centers = np.column_stack([ + np.asarray(las.x, dtype=np.float64), + np.asarray(las.y, dtype=np.float64), + np.asarray(las.z, dtype=np.float64), + ]).astype(np.float32) + + attributes = {} + + # Intensity (normalize to [0, 1]) + if hasattr(las, 'intensity'): + intensity = np.asarray(las.intensity, dtype=np.float32) + max_val = intensity.max() + if max_val > 0: + intensity = intensity / max_val + attributes['intensity'] = intensity + + # Classification + if hasattr(las, 'classification'): + attributes['classification'] = np.asarray( + las.classification, dtype=np.int32) + + # Return number + if hasattr(las, 'return_number'): + attributes['return_number'] = np.asarray( + las.return_number, dtype=np.int32) + + if hasattr(las, 'number_of_returns'): + attributes['number_of_returns'] = np.asarray( + las.number_of_returns, dtype=np.int32) + + # RGB color (LAS point formats 2, 3, 5, 7, 8, 10) + if hasattr(las, 'red') and hasattr(las, 'green') and hasattr(las, 'blue'): + r = np.asarray(las.red, dtype=np.float32) + g = np.asarray(las.green, dtype=np.float32) + b = np.asarray(las.blue, dtype=np.float32) + # LAS stores 16-bit RGB (0-65535) + max_rgb = max(r.max(), g.max(), b.max(), 1.0) + if max_rgb > 255: + r /= 65535.0 + g /= 65535.0 + b /= 65535.0 + elif max_rgb > 1: + r /= 255.0 + g /= 255.0 + b /= 255.0 + attributes['rgb'] = np.column_stack([r, g, b]) + + return centers, attributes + + +def build_colors(centers, attributes, color_mode='elevation', + colormap=None): + """ + Build per-point RGBA color array from point cloud attributes. + + Args: + centers: (N, 3) float32 XYZ coordinates. + attributes: Dict of per-point attributes from load_pointcloud(). + color_mode: One of: + - 'elevation': Color by Z value using colormap + - 'intensity': Grayscale from intensity attribute + - 'classification': ASPRS standard colors + - 'rgb': Direct RGB from LAS file + - tuple (r, g, b): Uniform solid color + colormap: Optional (256, 3) float32 lookup table for elevation mode. + If None, uses a default terrain colormap. + + Returns: + (N, 4) float32 RGBA array with values in [0, 1]. + """ + n = len(centers) + colors = np.ones((n, 4), dtype=np.float32) # default alpha = 1.0 + + if isinstance(color_mode, (tuple, list)): + # Uniform solid color + colors[:, 0] = color_mode[0] + colors[:, 1] = color_mode[1] + colors[:, 2] = color_mode[2] + + elif color_mode == 'elevation': + z = centers[:, 2] + z_min, z_max = np.nanmin(z), np.nanmax(z) + z_range = z_max - z_min + if z_range < 1e-6: + z_range = 1.0 + norm = np.clip((z - z_min) / z_range, 0.0, 1.0) + + if colormap is not None: + idx = (norm * 255).astype(np.int32) + idx = np.clip(idx, 0, 255) + colors[:, :3] = colormap[idx] + else: + # Default terrain ramp: blue → green → yellow → red → white + colors[:, 0] = np.clip(norm * 3.0 - 1.0, 0, 1) + colors[:, 1] = np.clip(1.0 - abs(norm * 3.0 - 1.5) * 1.5, 0, 1) + colors[:, 2] = np.clip(1.0 - norm * 2.0, 0, 1) + + elif color_mode == 'intensity': + if 'intensity' in attributes: + intensity = attributes['intensity'] + colors[:, 0] = intensity + colors[:, 1] = intensity + colors[:, 2] = intensity + else: + colors[:, :3] = 0.5 # fallback gray + + elif color_mode == 'classification': + if 'classification' in attributes: + cls = attributes['classification'] + for code, rgba in CLASSIFICATION_COLORS.items(): + mask = cls == code + if mask.any(): + colors[mask, 0] = rgba[0] + colors[mask, 1] = rgba[1] + colors[mask, 2] = rgba[2] + colors[mask, 3] = rgba[3] + # Unknown classes → default gray + unknown = ~np.isin(cls, list(CLASSIFICATION_COLORS.keys())) + if unknown.any(): + colors[unknown, :3] = 0.5 + else: + colors[:, :3] = 0.5 + + elif color_mode == 'rgb': + if 'rgb' in attributes: + colors[:, :3] = attributes['rgb'] + else: + colors[:, :3] = 0.5 + + return colors diff --git a/rtxpy/rtx.py b/rtxpy/rtx.py index c2fe248..04b7d6b 100644 --- a/rtxpy/rtx.py +++ b/rtxpy/rtx.py @@ -42,6 +42,7 @@ class _GASEntry: num_triangles: int = 0 is_curve: bool = False # True for round curve tube GAS is_heightfield: bool = False # True for heightfield custom primitive GAS + is_sphere: bool = False # True for sphere primitive GAS (point clouds) # ----------------------------------------------------------------------------- @@ -167,6 +168,9 @@ def __init__(self): self.hf_tile_size = 32 self.hf_num_tiles_x = 0 + # Point cloud state + self.point_colors = None # GPU buffer (cupy) for per-point RGBA colors + # Device buffers for CPU->GPU transfers (per-instance) self.d_rays = None self.d_rays_size = 0 @@ -197,6 +201,9 @@ def clear(self): self.hf_tile_size = 32 self.hf_num_tiles_x = 0 + # Clear point cloud state + self.point_colors = None + # Reset to single-GAS mode self.single_gas_mode = True @@ -535,6 +542,7 @@ def _init_optix(device: Optional[int] = None): usesPrimitiveTypeFlags=( optix.PRIMITIVE_TYPE_FLAGS_TRIANGLE | optix.PRIMITIVE_TYPE_FLAGS_CUSTOM + | optix.PRIMITIVE_TYPE_FLAGS_SPHERE | (optix.PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE_ROCAPS if _state.capabilities.get('has_rocaps_curves') else optix.PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE) @@ -622,13 +630,36 @@ def _init_optix(device: Optional[int] = None): ) _state.heightfield_hit_pg = _state.heightfield_hit_pg[0] + # Built-in IS module for spheres (point cloud rendering) + _sphere_is_options = optix.BuiltinISOptions( + builtinISModuleType=optix.PRIMITIVE_TYPE_SPHERE, + usesMotionBlur=False, + ) + _state.sphere_module = _state.context.builtinISModuleGet( + module_options, + pipeline_options, + _sphere_is_options, + ) + + # Hit group for spheres (built-in IS for intersection, dedicated CH for normals) + sphere_hit_desc = optix.ProgramGroupDesc() + sphere_hit_desc.hitgroupModuleCH = _state.module + sphere_hit_desc.hitgroupEntryFunctionNameCH = "__closesthit__sphere" + sphere_hit_desc.hitgroupModuleIS = _state.sphere_module + _state.sphere_hit_pg, log = _state.context.programGroupCreate( + [sphere_hit_desc], + pg_options, + ) + _state.sphere_hit_pg = _state.sphere_hit_pg[0] + # Create pipeline link_options = optix.PipelineLinkOptions( maxTraceDepth=1, ) program_groups = [_state.raygen_pg, _state.miss_pg, _state.hit_pg, - _state.curve_hit_pg, _state.heightfield_hit_pg] + _state.curve_hit_pg, _state.heightfield_hit_pg, + _state.sphere_hit_pg] _state.pipeline = _state.context.pipelineCreate( pipeline_options, link_options, @@ -658,8 +689,8 @@ def _init_optix(device: Optional[int] = None): # Create shader binding table _create_sbt() - # Allocate params buffer: 48 (existing) + 40 (heightfield fields) = 88 - _state.d_params = cupy.zeros(88, dtype=cupy.uint8) + # Allocate params buffer: 48 + 40 (heightfield) + 8 (point_colors) = 96 + _state.d_params = cupy.zeros(96, dtype=cupy.uint8) _state.initialized = True atexit.register(_cleanup_at_exit) @@ -683,7 +714,7 @@ def _create_sbt(): optix.sbtRecordPackHeader(_state.miss_pg, miss_record) d_miss = cupy.array(np.frombuffer(miss_record, dtype=np.uint8)) - # Pack hit group records: [0] = triangles, [1] = curves, [2] = heightfield + # Pack hit group records: [0] = triangles, [1] = curves, [2] = heightfield, [3] = spheres hit_record = bytearray(header_size) optix.sbtRecordPackHeader(_state.hit_pg, hit_record) @@ -693,8 +724,12 @@ def _create_sbt(): hf_hit_record = bytearray(header_size) optix.sbtRecordPackHeader(_state.heightfield_hit_pg, hf_hit_record) + sphere_hit_record = bytearray(header_size) + optix.sbtRecordPackHeader(_state.sphere_hit_pg, sphere_hit_record) + # Concatenate all hit records into a single buffer - hit_all = bytearray(hit_record) + bytearray(curve_hit_record) + bytearray(hf_hit_record) + hit_all = (bytearray(hit_record) + bytearray(curve_hit_record) + + bytearray(hf_hit_record) + bytearray(sphere_hit_record)) d_hit = cupy.array(np.frombuffer(hit_all, dtype=np.uint8)) _state.sbt = optix.ShaderBindingTable( @@ -704,7 +739,7 @@ def _create_sbt(): missRecordCount=1, hitgroupRecordBase=d_hit.data.ptr, hitgroupRecordStrideInBytes=header_size, - hitgroupRecordCount=3, + hitgroupRecordCount=4, ) # Keep references to prevent garbage collection @@ -1357,6 +1392,87 @@ def _build_gas_for_heightfield(elevation_data, H, W, spacing_x, spacing_y, ve, t return gas_handle, gas_buffer, d_elevation, num_tiles_x, num_tiles_y +def _build_gas_for_spheres(centers, radii, single_radius=False): + """ + Build a GAS for sphere primitives (point cloud rendering). + + Args: + centers: Sphere center positions (N*3 float32, flattened) + radii: Per-sphere radii (N float32) or single float if single_radius=True + single_radius: If True, radii contains a single value for all spheres + + Returns: + Tuple of (gas_handle, gas_buffer) or (0, None) on error + """ + global _state + + if not _state.initialized: + _init_optix() + + d_centers = cupy.asarray(centers, dtype=cupy.float32) + num_vertices = d_centers.size // 3 + + if num_vertices == 0: + return 0, None + + if single_radius: + d_radii = cupy.asarray([radii] if np.isscalar(radii) else radii, + dtype=cupy.float32) + else: + d_radii = cupy.asarray(radii, dtype=cupy.float32) + + build_input = optix.BuildInputSphereArray() + build_input.vertexBuffers = [d_centers.data.ptr] + build_input.numVertices = num_vertices + build_input.vertexStrideInBytes = 12 # 3 * sizeof(float) + build_input.radiusBuffers = [d_radii.data.ptr] + build_input.radiusStrideInBytes = 4 if not single_radius else 0 + build_input.singleRadius = 1 if single_radius else 0 + build_input.flags = [optix.GEOMETRY_FLAG_DISABLE_ANYHIT] + build_input.numSbtRecords = 1 + + accel_options = optix.AccelBuildOptions( + buildFlags=optix.BUILD_FLAG_ALLOW_COMPACTION, + operation=optix.BUILD_OPERATION_BUILD, + ) + + buffer_sizes = _state.context.accelComputeMemoryUsage( + [accel_options], + [build_input], + ) + + d_temp = cupy.zeros(buffer_sizes.tempSizeInBytes, dtype=cupy.uint8) + gas_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8) + compacted_size_buffer = cupy.zeros(1, dtype=cupy.uint64) + + gas_handle = _state.context.accelBuild( + 0, # stream + [accel_options], + [build_input], + d_temp.data.ptr, + buffer_sizes.tempSizeInBytes, + gas_buffer.data.ptr, + buffer_sizes.outputSizeInBytes, + [optix.AccelEmitDesc(compacted_size_buffer.data.ptr, + optix.PROPERTY_TYPE_COMPACTED_SIZE)], + ) + + cupy.cuda.Stream.null.synchronize() + + compacted_size = int(compacted_size_buffer[0]) + if compacted_size < gas_buffer.nbytes: + compacted_buffer = cupy.zeros(compacted_size, dtype=cupy.uint8) + gas_handle = _state.context.accelCompact( + 0, + gas_handle, + compacted_buffer.data.ptr, + compacted_size, + ) + gas_buffer = compacted_buffer + + return gas_handle, gas_buffer + + def _build_ias(geom_state: _GeometryState): """ Build an Instance Acceleration Structure (IAS) from all GAS entries. @@ -1403,8 +1519,10 @@ def _build_ias(geom_state: _GeometryState): # Pack instanceId (4 bytes) struct.pack_into('I', instances_data, offset + 48, i) - # Pack sbtOffset (4 bytes) - 0 for triangles, 1 for curves, 2 for heightfield - if entry.is_heightfield: + # Pack sbtOffset (4 bytes) - 0=triangles, 1=curves, 2=heightfield, 3=spheres + if entry.is_sphere: + sbt_offset = 3 + elif entry.is_heightfield: sbt_offset = 2 elif entry.is_curve: sbt_offset = 1 @@ -1720,15 +1838,20 @@ def _trace_rays(geom_state: _GeometryState, rays, hits, num_rays: int, hf_tile = geom_state.hf_tile_size hf_ntx = geom_state.hf_num_tiles_x + # Point cloud colors pointer + pc_colors_ptr = 0 + if geom_state.point_colors is not None: + pc_colors_ptr = geom_state.point_colors.data.ptr + params_data = struct.pack( - 'QQQQQIIQiifffiii', + 'QQQQQIIQiifffiiIQ', trace_handle, # 8 d_rays.data.ptr, # 8 d_hits.data.ptr, # 8 d_prim_ids_ptr, # 8 d_inst_ids_ptr, # 8 ray_flags, # 4 - 0, # 4 padding (existing) + 0, # 4 padding hf_data_ptr, # 8 hf_w, # 4 hf_h, # 4 @@ -1737,7 +1860,8 @@ def _trace_rays(geom_state: _GeometryState, rays, hits, num_rays: int, hf_ve, # 4 hf_tile, # 4 hf_ntx, # 4 - 0, # 4 padding + 0, # 4 padding for alignment + pc_colors_ptr, # 8 ) _state.d_params[:] = cupy.frombuffer(np.frombuffer(params_data, dtype=np.uint8), dtype=cupy.uint8) @@ -1746,7 +1870,7 @@ def _trace_rays(geom_state: _GeometryState, rays, hits, num_rays: int, _state.pipeline, 0, # stream _state.d_params.data.ptr, - 88, # sizeof(Params) + 96, # sizeof(Params) _state.sbt, num_rays, # width 1, # height @@ -2206,6 +2330,92 @@ def add_heightfield_geometry(self, geometry_id: str, elevation, self._geom_state.ias_dirty = True return 0 + def add_sphere_geometry(self, geometry_id: str, centers, radii, + colors=None, + transform: Optional[List[float]] = None) -> int: + """ + Add sphere primitives to the scene (for point cloud rendering). + + Uses OptiX built-in sphere intersection for hardware-accelerated + ray-sphere tests. Each sphere is defined by a center point and radius. + + Args: + geometry_id: Unique identifier for this geometry. + centers: Sphere center positions (N*3 float32, flattened or Nx3). + radii: Per-sphere radii (N float32), or a single float for uniform radius. + colors: Optional per-point RGBA colors (N*4 float32). If provided, + stored on geometry state for use by the shade kernel. + transform: Optional 12-float 3x4 row-major affine transform. + + Returns: + 0 on success, non-zero on error. + """ + global _state + + if not _state.initialized: + _init_optix() + + # Switch to multi-GAS mode if currently in single-GAS mode + if self._geom_state.single_gas_mode: + self._geom_state.gas_handle = 0 + self._geom_state.gas_buffer = None + self._geom_state.current_hash = 0xFFFFFFFFFFFFFFFF + self._geom_state.single_gas_mode = False + + # Prepare centers + if isinstance(centers, cupy.ndarray): + centers_for_hash = centers.get() + else: + centers_for_hash = np.asarray(centers, dtype=np.float32) + vertices_hash = hash(centers_for_hash.tobytes()) + + existing = self._geom_state.gas_entries.get(geometry_id) + if existing is not None and existing.vertices_hash == vertices_hash: + if transform is not None: + existing.transform = list(transform) + self._geom_state.ias_dirty = True + return 0 + + # Determine if single radius + single_radius = np.isscalar(radii) + + gas_handle, gas_buffer = _build_gas_for_spheres( + centers, radii, single_radius=single_radius) + if gas_handle == 0: + return -1 + + if transform is None: + transform = [ + 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + ] + else: + transform = list(transform) + if len(transform) != 12: + return -1 + + num_vertices = len(centers_for_hash.ravel()) // 3 + + self._geom_state.gas_entries[geometry_id] = _GASEntry( + gas_id=geometry_id, + gas_handle=gas_handle, + gas_buffer=gas_buffer, + vertices_hash=vertices_hash, + transform=transform, + num_vertices=num_vertices, + num_triangles=0, + is_sphere=True, + ) + + # Store per-point colors if provided + if colors is not None: + self._geom_state.point_colors = cupy.asarray( + colors, dtype=cupy.float32) + + self._geom_state.ias_dirty = True + return 0 + def remove_geometry(self, geometry_id: str) -> int: """ Remove a geometry from the scene.