-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark_host_device.py
More file actions
50 lines (39 loc) · 1.6 KB
/
benchmark_host_device.py
File metadata and controls
50 lines (39 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import torch
import gdel3d
import time
# gdel3d has two bindings
# one uses their code and expects cpu tensors so the binding needs to make sure tensor is on cpu
# furthermore it
# the other one leaves the data in gpu avoiding repeated transfers back and forth to cpu
def benchmark():
N = 700_000
points_cuda = torch.rand((N, 3), dtype=torch.float32, device='cuda')
torch.cuda.synchronize()
# Warming up kernels
_ = gdel3d.triangulate(points_cuda[:1000])
# Timing events
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
# Measuring triangulate_host given GPU tensors
start_event.record()
tets_final_host = gdel3d.triangulate_host(points_cuda)
end_event.record()
torch.cuda.synchronize()
total_host = start_event.elapsed_time(end_event)
# Warming up kernels
_ = gdel3d.triangulate_device(points_cuda[:1000])
torch.cuda.synchronize()
start_event.record()
tets_final_device = gdel3d.triangulate_device(points_cuda)
end_event.record()
torch.cuda.synchronize()
total_device = start_event.elapsed_time(end_event)
speedup = total_host / total_device
print(f"Host Total time: {total_host:.2f} ms", flush=True)
print(f"Device Total time: {total_device:.2f} ms", flush=True)
print(f"Speedup: {speedup:.2f}x", flush=True)
# Verify counts roughly match
diff = abs(tets_final_host.shape[0] - tets_final_device.shape[0])
print(f"\nTetrahedron count difference: {diff} (Due to GPU non-determinism)", flush=True)
if __name__ == "__main__":
benchmark()