Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
data/*
output/*
__pycache__/
*.pyc
*.pyo
*.pyd
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python-envs.defaultEnvManager": "ms-python.python:conda",
"python-envs.defaultPackageManager": "ms-python.python:conda"
}
599 changes: 599 additions & 0 deletions PROJECT_SUMMARY.md

Large diffs are not rendered by default.

22 changes: 21 additions & 1 deletion arguments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def extract(self, args):
setattr(group, arg[0], arg[1])
return group

class ModelParams(ParamGroup):
class ModelParams(ParamGroup):
def __init__(self, parser, sentinel=False):
self.sh_degree = 3
self._source_path = ""
Expand All @@ -54,6 +54,10 @@ def __init__(self, parser, sentinel=False):
self._white_background = False
self.data_device = "cuda"
self.eval = False
# TD-FastGS 4D extension. When force_4dgs is True the scene loader always
# uses the 4DGS reader; otherwise it is auto-detected from the dataset layout.
self.force_4dgs = False
self.n_frames = -1 # number of temporal frames; -1 => infer from data
super().__init__(parser, "Loading Parameters", sentinel)

def extract(self, args):
Expand Down Expand Up @@ -101,6 +105,22 @@ def __init__(self, parser):

self.random_background = False
self.optimizer_type = "default"

# ----- TD-FastGS 4D (temporal) parameters -----
self.velocity_lr = 0.0016 # learning rate for per-Gaussian velocity v
self.sigma_t_lr = 0.002 # learning rate for sigma_t_raw (life radius, log space)
self.lambda_velocity = 0.01 # weight of the velocity-smoothness regularizer (lambda_v)
self.velocity_smooth_pairs = 4096 # number of point-pairs sampled for L_smooth
self.tau_alive = 0.005 # causal pruning threshold on alpha'(t)
self.tau_d_static = 5.0 # densification (VCD) threshold for static points
self.tau_d_dynamic = 2.5 # densification (VCD) threshold for dynamic points
self.tau_p = 0.9 # pruning (VCP) threshold
self.wt_densify_thresh = 0.2 # w_t active-window threshold used for densify/prune gating
self.wt_current_thresh = 0.5 # w_t "current frame" threshold for the gradient gate
self.static_only_until = 3000 # stage-1 boundary: sample only frame 0 before this
self.temporal_window_until = 10000 # stage-2 boundary: sliding-window sampling before this
self.temporal_window_size = 4 # sliding-window width (frames) in stage 2
self.lambda_scale_penalty = 0.0 # soft scale penalty weight for dynamic points (0 => off)
super().__init__(parser, "Optimization Parameters")

def get_combined_args(parser : ArgumentParser):
Expand Down
Binary file removed arguments/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file removed arguments/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
146 changes: 146 additions & 0 deletions export_frames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
从训练好的 4DGS point_cloud.ply 中,按帧导出每一帧的高斯点云。

兼容两种属性命名:
TD-FastGS (本项目): sigma_t_raw, vel_x/vel_y/vel_z, is_static
DT-4DGS (原始): t_sigma, velocity_0/1/2

对于每个帧时间 t,执行:
1. 位置偏移: xyz_t = xyz + velocity * (t - t_mu)
2. 时域高斯权重: w = exp(- (t - t_mu)^2 / (2 * sigma^2 + 1e-5))
3. 不透明度调制: opacity_t = opacity * w
4. 因果律: 仅保留 t_mu <= t 且 opacity_t > threshold 的点

用法:
python export_frames.py --ply <path_to_point_cloud.ply> --out <output_dir> --num_frames 80
python export_frames.py --ply output/flower_1/point_cloud/iteration_30000/point_cloud.ply --out output/flower_1/frames --num_frames 80
"""

import argparse
import os
import numpy as np
from plyfile import PlyData, PlyElement


def load_4dgs_ply(path):
plydata = PlyData.read(path)
v = plydata["vertex"]
all_props = [p.name for p in v.properties]

xyz = np.stack([v["x"], v["y"], v["z"]], axis=1) # (N, 3)

# opacity (raw, pre-sigmoid)
opacity_raw = np.asarray(v["opacity"]) # (N,)

# t_mu always present
t_mu = np.asarray(v["t_mu"]) # (N,)

# Temporal-width field: TD-FastGS saves "sigma_t_raw"; DT-4DGS saves "t_sigma".
if "sigma_t_raw" in all_props:
t_sigma_raw = np.asarray(v["sigma_t_raw"])
elif "t_sigma" in all_props:
t_sigma_raw = np.asarray(v["t_sigma"])
else:
raise ValueError("PLY has neither 'sigma_t_raw' nor 't_sigma' — not a 4DGS file?")

# Velocity: TD-FastGS uses vel_x/y/z; DT-4DGS uses velocity_0/1/2.
if "vel_x" in all_props:
velocity = np.stack([v["vel_x"], v["vel_y"], v["vel_z"]], axis=1)
elif "velocity_0" in all_props:
velocity = np.stack([v["velocity_0"], v["velocity_1"], v["velocity_2"]], axis=1)
else:
raise ValueError("PLY has no velocity attributes — not a 4DGS file?")

return plydata, xyz, opacity_raw, t_mu, t_sigma_raw, velocity, all_props


def sigmoid(x):
return 1.0 / (1.0 + np.exp(-np.clip(x, -80, 80)))


def export_frame(plydata, xyz, opacity_raw, t_mu, t_sigma_raw, velocity, frame_t, threshold=0.005):
"""返回该帧存活点的索引、偏移后的 xyz、调制后的 opacity_raw"""
dt = frame_t - t_mu # (N,)
xyz_t = xyz + velocity * dt[:, None] # (N, 3)

sigma = np.exp(t_sigma_raw) # (N,)
temporal_weight = np.exp(-(dt ** 2) / (2.0 * sigma ** 2 + 1e-5)) # (N,)

opacity_activated = sigmoid(opacity_raw) # (N,)
opacity_t = opacity_activated * temporal_weight # (N,)

# 因果律剪枝
alive = (t_mu <= frame_t) & (opacity_t > threshold)

# 调制后的 opacity 转回 raw (inverse sigmoid)
opacity_t_clamped = np.clip(opacity_t, 1e-7, 1.0 - 1e-7)
opacity_t_raw = np.log(opacity_t_clamped / (1.0 - opacity_t_clamped))

return alive, xyz_t, opacity_t_raw


def save_frame_ply(plydata, alive_mask, xyz_t, opacity_t_raw, output_path):
"""将存活点写成标准 3DGS PLY(去掉 4DGS 专属字段)"""
src = plydata["vertex"]

# Skip all 4DGS-specific fields regardless of naming convention.
skip = {
"t_mu",
"sigma_t_raw", "t_sigma", # TD-FastGS / DT-4DGS temporal width
"vel_x", "vel_y", "vel_z", # TD-FastGS velocity
"velocity_0", "velocity_1", "velocity_2", # DT-4DGS velocity
"is_static", # TD-FastGS static flag
}
keep_names = [p.name for p in src.properties if p.name not in skip]
src_dtype = src.data.dtype
dtype_out = [(name, src_dtype[name].str) for name in keep_names]

n_alive = int(alive_mask.sum())
elements = np.empty(n_alive, dtype=dtype_out)

for name in keep_names:
col = np.asarray(src[name])[alive_mask]
if name == "x":
col = xyz_t[alive_mask, 0]
elif name == "y":
col = xyz_t[alive_mask, 1]
elif name == "z":
col = xyz_t[alive_mask, 2]
elif name == "opacity":
col = opacity_t_raw[alive_mask]
elements[name] = col

el = PlyElement.describe(elements, "vertex")
PlyData([el]).write(output_path)


def main():
parser = argparse.ArgumentParser(description="Export per-frame PLY from a 4DGS checkpoint")
parser.add_argument("--ply", type=str, required=True, help="Path to the trained point_cloud.ply")
parser.add_argument("--out", type=str, required=True, help="Output directory for per-frame PLY files")
parser.add_argument("--num_frames", type=int, default=80, help="Number of frames to export")
parser.add_argument("--threshold", type=float, default=0.005, help="Opacity threshold for pruning")
args = parser.parse_args()

os.makedirs(args.out, exist_ok=True)

print(f"Loading PLY: {args.ply}")
plydata, xyz, opacity_raw, t_mu, t_sigma_raw, velocity, all_props = load_4dgs_ply(args.ply)
print(f"Total gaussians: {xyz.shape[0]}")

for i in range(args.num_frames):
frame_t = i / max(args.num_frames - 1, 1)
alive, xyz_t, opacity_t_raw = export_frame(
plydata, xyz, opacity_raw, t_mu, t_sigma_raw, velocity, frame_t, args.threshold
)
n_alive = int(alive.sum())

out_path = os.path.join(args.out, f"{i + 1}.ply")
save_frame_ply(plydata, alive, xyz_t, opacity_t_raw, out_path)
print(f"Frame {i + 1}/{args.num_frames} t={frame_t:.4f} alive={n_alive} -> {out_path}")

print("Done.")


if __name__ == "__main__":
main()
140 changes: 139 additions & 1 deletion gaussian_renderer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,142 @@ def render_fastgs(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.T
"viewspace_points": screenspace_points,
"visibility_filter" : (radii > 0).nonzero(),
"radii": radii,
"accum_metric_counts" : accum_metric_counts}
"accum_metric_counts" : accum_metric_counts}


def render_4d(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, mult,
scaling_modifier = 1.0, override_color = None, get_flag=None, metric_map = None):
"""TD-FastGS 4D render.

Mandatory ordering:
1. Spatio-temporal transform: translate centers to the current frame.
2. Causal pruning: build the alive_mask sparse subset.
3. Compact Box + rasterization on the alive subset only (FastGS CB runs
inside the CUDA kernel, so subsetting the inputs is what guarantees CB
is computed only over alive Gaussians).
4. Back-fill per-Gaussian outputs (radii, metric counts) to full size so the
FastGS VCD/VCP statistics keep operating on full-size tensors.

The full-size `screenspace_points` is indexed to form the subset means2D; the
rasterizer backward therefore scatters the screen-space gradient back into the
full-size tensor, so add_densification_stats works exactly as in the 3D path.
"""
t = float(viewpoint_camera.timestamp)

# --- Step 1: spatio-temporal transform (kept in graph; w_t feeds sigma_t_raw) ---
w_t = pc.compute_temporal_weight(t) # (N,)
dt = t - pc._t_mu # (N,)
xyz_transformed = pc.get_xyz + pc._velocity * dt.unsqueeze(-1) # (N, 3)
opacity_eff = pc.get_opacity.squeeze(-1) * w_t # (N,)

N = pc.get_xyz.shape[0]

# --- Step 2: causal pruning (boolean mask; no grad needed for the mask) ---
with torch.no_grad():
causal_mask = pc._t_mu <= (t + 1e-6) # (N,) bool
alive_mask = causal_mask & (opacity_eff > pc.tau_alive)
alive_idx = alive_mask.nonzero(as_tuple=False).squeeze(-1)

# Full-size screen-space tensor; subset rows receive grad via index backward.
screenspace_points = torch.zeros((N, 4), dtype=pc.get_xyz.dtype,
requires_grad=True, device="cuda") + 0
try:
screenspace_points.retain_grad()
except Exception:
pass

radii_full = torch.zeros(N, dtype=torch.int, device="cuda")
accum_full = torch.zeros(N, dtype=torch.int, device="cuda")

if alive_idx.numel() == 0:
# Nothing alive at this time: return a background image and empty stats.
H, W = int(viewpoint_camera.image_height), int(viewpoint_camera.image_width)
rendered_image = bg_color.view(3, 1, 1).expand(3, H, W).contiguous()
return {"render": rendered_image,
"viewspace_points": screenspace_points,
"visibility_filter": (radii_full > 0).nonzero(),
"radii": radii_full,
"accum_metric_counts": accum_full,
"w_t": w_t,
"alive_mask": alive_mask}

# Rasterization configuration.
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)

if metric_map is None:
metric_map = torch.zeros(int(viewpoint_camera.image_height) * int(viewpoint_camera.image_width),
dtype=torch.int, device='cuda')

raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform,
projmatrix=viewpoint_camera.full_proj_transform,
sh_degree=pc.active_sh_degree,
campos=viewpoint_camera.camera_center,
mult=mult,
prefiltered=False,
debug=pipe.debug,
get_flag=get_flag,
metric_map=metric_map
)
rasterizer = GaussianRasterizer(raster_settings=raster_settings)

# --- Step 3: extract the alive subset of every per-Gaussian input ---
means3D = xyz_transformed[alive_idx]
means2D = screenspace_points[alive_idx] # grad scatters back to full size
opacity = opacity_eff[alive_idx].unsqueeze(-1)

scales = None
rotations = None
cov3D_precomp = None
if pipe.compute_cov3D_python:
cov3D_precomp = pc.get_covariance(scaling_modifier)[alive_idx]
else:
scales = pc.get_scaling[alive_idx]
rotations = pc.get_rotation[alive_idx]

dc = None
shs = None
colors_precomp = None
if override_color is None:
if pipe.convert_SHs_python:
shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree + 1) ** 2)[alive_idx]
dir_pp = (xyz_transformed[alive_idx] - viewpoint_camera.camera_center.repeat(alive_idx.shape[0], 1))
dir_pp_normalized = dir_pp / dir_pp.norm(dim=1, keepdim=True)
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
else:
dc = pc.get_features_dc[alive_idx]
shs = pc.get_features_rest[alive_idx]
else:
colors_precomp = override_color[alive_idx]

rendered_image, radii_sparse, accum_sparse = rasterizer(
means3D=means3D,
means2D=means2D,
dc=dc,
shs=shs,
colors_precomp=colors_precomp,
opacities=opacity,
scales=scales,
rotations=rotations,
cov3D_precomp=cov3D_precomp)

# --- Step 4: back-fill per-Gaussian outputs to full size ---
radii_full[alive_idx] = radii_sparse
if accum_sparse is not None and accum_sparse.numel() == alive_idx.numel():
accum_full[alive_idx] = accum_sparse.to(accum_full.dtype)

return {"render": rendered_image,
"viewspace_points": screenspace_points,
"visibility_filter": (radii_full > 0).nonzero(),
"radii": radii_full,
"accum_metric_counts": accum_full,
"w_t": w_t,
"alive_mask": alive_mask}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed lpipsPyTorch/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file removed lpipsPyTorch/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions memory/MEMORY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Memory Index

- [flower300 data format](flower300-data-format.md) — multi-view-video 4D dataset layout (36 cams x 300 frames, pcd<N>.ply), differs from prompt.md spec
16 changes: 16 additions & 0 deletions memory/flower300-data-format.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
name: flower300-data-format
description: Layout of the user's flower300 multi-view-video 4D dataset
metadata:
type: project
---
The user's TD-FastGS data (`flower300/`, also the target real-data format) is a multi-view video, NOT the `points3D.ply`/`frame_*.ply` layout assumed by the original prompt.md spec:

- `sparse/0/{cameras,images,points3D}.txt` — COLMAP calibration for 36 fixed cameras (PINHOLE, 3839x2159). `images.txt` names them `1.png`..`36.png` (these are CAMERA ids, not frames). `points3D.txt` is EMPTY — no COLMAP point cloud; init comes only from the PLYs below.
- `images/<frame>/images/<cam>.png` — frames 1..300, each folder has 36 cam PNGs (some frames also have redundant `.jpg` duplicates that must be ignored). Training cameras = cross product 36 cams x 300 frames = 10,800 images.
- `static_points/pcd1.ply` — ~17k static background pts (t_mu=0).
- `dynamic_points/pcd<frame>.ply` — frames 1..300, ~2300 pts each, born at that frame's time.

Timestamp normalization MUST be identical for cameras and dynamic PLYs: `t = (frame_id - fmin)/(fmax - fmin)`, fmin=1 fmax=300. Static => t=0.

10,800 full-res images cannot be eagerly loaded to GPU — Camera needs lazy disk loading with a bounded LRU CPU cache. See [[fast4dgs-td-implementation]].
Loading