@@ -406,9 +406,11 @@ renderCUDA(
406406 const float2 * __restrict__ points_xy_image,
407407 const float4 * __restrict__ conic_opacity,
408408 const float * __restrict__ colors,
409+ const float * __restrict__ depths,
409410 const float * __restrict__ final_Ts,
410411 const uint32_t * __restrict__ n_contrib,
411412 const float * __restrict__ dL_dpixels,
413+ const float * __restrict__ dL_depths,
412414 float3 * __restrict__ dL_dmean2D,
413415 float4 * __restrict__ dL_dconic2D,
414416 float * __restrict__ dL_dopacity,
@@ -435,6 +437,7 @@ renderCUDA(
435437 __shared__ float2 collected_xy[BLOCK_SIZE];
436438 __shared__ float4 collected_conic_opacity[BLOCK_SIZE];
437439 __shared__ float collected_colors[C * BLOCK_SIZE];
440+ __shared__ float collected_depths[BLOCK_SIZE];
438441
439442 // In the forward, we stored the final value for T, the
440443 // product of all (1 - alpha) factors.
@@ -448,12 +451,16 @@ renderCUDA(
448451
449452 float accum_rec[C] = { 0 };
450453 float dL_dpixel[C];
454+ float dL_depth;
455+ float accum_depth_rec = 0 ;
451456 if (inside)
452457 for (int i = 0 ; i < C; i++)
453458 dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
459+ dL_depth = dL_depths[pix_id];
454460
455461 float last_alpha = 0 ;
456462 float last_color[C] = { 0 };
463+ float last_depth = 0 ;
457464
458465 // Gradient of pixel coordinate w.r.t. normalized
459466 // screen-space viewport corrdinates (-1 to 1)
@@ -475,6 +482,7 @@ renderCUDA(
475482 collected_conic_opacity[block.thread_rank ()] = conic_opacity[coll_id];
476483 for (int i = 0 ; i < C; i++)
477484 collected_colors[i * BLOCK_SIZE + block.thread_rank ()] = colors[coll_id * C + i];
485+ collected_depths[block.thread_rank ()] = depths[coll_id];
478486 }
479487 block.sync ();
480488
@@ -522,6 +530,17 @@ renderCUDA(
522530 // many that were affected by this Gaussian.
523531 atomicAdd (&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_dchannel);
524532 }
533+
534+ // Propagate gradients to per-Gaussian depths
535+ const float c_d = collected_depths[j];
536+ accum_depth_rec = last_alpha * last_depth + (1 .f - last_alpha) * accum_depth_rec;
537+ last_depth = c_d;
538+ dL_dalpha += (c_d - accum_depth_rec) * dL_depth;
539+ // for (int ch = 0; ch < C; ch++)
540+ // {
541+ // atomicAdd(&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_depth);
542+ // }
543+
525544 dL_dalpha *= T;
526545 // Update last alpha (to be used in the next iteration)
527546 last_alpha = alpha;
@@ -630,9 +649,11 @@ void BACKWARD::render(
630649 const float2 * means2D,
631650 const float4 * conic_opacity,
632651 const float * colors,
652+ const float * depths,
633653 const float * final_Ts,
634654 const uint32_t * n_contrib,
635655 const float * dL_dpixels,
656+ const float * dL_depths,
636657 float3 * dL_dmean2D,
637658 float4 * dL_dconic2D,
638659 float * dL_dopacity,
@@ -646,9 +667,11 @@ void BACKWARD::render(
646667 means2D,
647668 conic_opacity,
648669 colors,
670+ depths,
649671 final_Ts,
650672 n_contrib,
651673 dL_dpixels,
674+ dL_depths,
652675 dL_dmean2D,
653676 dL_dconic2D,
654677 dL_dopacity,
0 commit comments