forked from hpcaitech/OPT-Benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
28 lines (23 loc) · 1.13 KB
/
utils.py
File metadata and controls
28 lines (23 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torch
import torch.distributed as dist
def memory_cap(size_in_GB):
print(f"use only {size_in_GB} GB of CUDA memory")
assert dist.is_initialized(), "memory_cap must be used after dist init"
local_rank = dist.get_rank()
cuda_capacity = torch.cuda.get_device_properties(local_rank).total_memory
size_in_B = (size_in_GB * 1024**3)
if size_in_B > cuda_capacity:
print(f'memory_cap is uselsess since {cuda_capacity / 1024**3} less than {size_in_GB}')
return
fraction = (size_in_GB * 1024**3) / cuda_capacity
print(f'mem faction is {fraction}')
torch.cuda.set_per_process_memory_fraction(fraction, local_rank)
def colo_memory_cap(size_in_GB):
from colossalai.utils import colo_set_process_memory_fraction, colo_device_memory_capacity
from colossalai.utils import get_current_device
cuda_capacity = colo_device_memory_capacity(get_current_device())
if size_in_GB * (1024**3) < cuda_capacity:
colo_set_process_memory_fraction(size_in_GB * (1024**3) / cuda_capacity)
print("Using {} GB of GPU memory".format(size_in_GB))
if __name__ == '__main__':
memory_cap(40)