From a0ae635b930535129ec2d2e04bc671ce3febb81d Mon Sep 17 00:00:00 2001 From: Jaime Fraustro Date: Fri, 21 Feb 2025 11:42:11 -0600 Subject: [PATCH 1/6] Add support for Intel GPU to MNIST examples * Add support for Intel GPU to MNIST example * Add support for Intel GPU to MNIST Forward-Forward example * Add support for Intel GPU to MNIST using RNN example and update README with optional arguments * Refactor argument parsing in MNIST examples. There is no need to use `default=False` with `store_true` Signed-off-by: jafraustro --- mnist/main.py | 13 +++++++++---- mnist_forward_forward/README.md | 1 + mnist_forward_forward/main.py | 12 ++++++++---- mnist_rnn/README.md | 17 +++++++++++++++++ mnist_rnn/main.py | 10 +++++++--- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/mnist/main.py b/mnist/main.py index 184dc4744f..09487639d4 100644 --- a/mnist/main.py +++ b/mnist/main.py @@ -82,21 +82,24 @@ def main(): help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') - parser.add_argument('--no-mps', action='store_true', default=False, + parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--no-xpu', action='store_true', + help='disables Intel GPU training') + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_mps and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -104,6 +107,8 @@ def main(): device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") diff --git a/mnist_forward_forward/README.md b/mnist_forward_forward/README.md index f6ae12e56d..8857c9a6fb 100644 --- a/mnist_forward_forward/README.md +++ b/mnist_forward_forward/README.md @@ -18,6 +18,7 @@ optional arguments: --lr LR learning rate (default: 0.03) --no_cuda disables CUDA training --no_mps disables MPS training + --no_xpu disables XPU training --seed SEED random seed (default: 1) --save_model For saving the current Model --train_size TRAIN_SIZE diff --git a/mnist_forward_forward/main.py b/mnist_forward_forward/main.py index a175126067..e6c2902ed8 100644 --- a/mnist_forward_forward/main.py +++ b/mnist_forward_forward/main.py @@ -102,10 +102,13 @@ def train(self, x_pos, x_neg): help="learning rate (default: 0.03)", ) parser.add_argument( - "--no_cuda", action="store_true", default=False, help="disables CUDA training" + "--no_cuda", action="store_true", help="disables CUDA training" ) parser.add_argument( - "--no_mps", action="store_true", default=False, help="disables MPS training" + "--no_mps", action="store_true", help="disables MPS training" + ) + parser.add_argument( + "--no_xpu", action="store_true", help="disables XPU training" ) parser.add_argument( "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)" @@ -113,7 +116,6 @@ def train(self, x_pos, x_neg): parser.add_argument( "--save_model", action="store_true", - default=False, help="For saving the current Model", ) parser.add_argument( @@ -126,7 +128,6 @@ def train(self, x_pos, x_neg): parser.add_argument( "--save-model", action="store_true", - default=False, help="For Saving the current Model", ) parser.add_argument( @@ -139,10 +140,13 @@ def train(self, x_pos, x_neg): args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() if use_cuda: device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") diff --git a/mnist_rnn/README.md b/mnist_rnn/README.md index c879cb367f..ba63513711 100644 --- a/mnist_rnn/README.md +++ b/mnist_rnn/README.md @@ -8,3 +8,20 @@ pip install -r requirements.txt python main.py # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` + +```bash +optional arguments: + -h, --help show this help message and exit + --batch_size input batch_size for training (default:64) + --testing_batch_size input batch size for testing (default: 1000) + --epochs EPOCHS number of epochs to train (default: 14) + --lr LR learning rate (default: 0.1) + --gamma learning rate step gamma (default: 0.7) + --cuda enables CUDA training + --xpu enables XPU training + --mps enables macos GPU training + --seed SEED random seed (default: 1) + --save_model For saving the current Model + --log_interval how many batches to wait before logging training status + --dry-run quickly check a single pass +``` \ No newline at end of file diff --git a/mnist_rnn/main.py b/mnist_rnn/main.py index 2fa64c00d6..f6c1ff3d48 100644 --- a/mnist_rnn/main.py +++ b/mnist_rnn/main.py @@ -93,15 +93,17 @@ def main(): help='learning rate step gamma (default: 0.7)') parser.add_argument('--cuda', action='store_true', default=False, help='enables CUDA training') - parser.add_argument('--mps', action="store_true", default=False, + parser.add_argument('--mps', action="store_true", help="enables MPS training") - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--xpu', action='store_true', + help='enables XPU training') + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='for Saving the current Model') args = parser.parse_args() @@ -109,6 +111,8 @@ def main(): device = "cuda" elif args.mps and not args.cuda: device = "mps" + elif args.xpu: + device = "xpu" else: device = "cpu" From 4a2e3e30abd3dc91a76585d2ca47b84a8824b6b7 Mon Sep 17 00:00:00 2001 From: eromomon Date: Fri, 21 Feb 2025 11:44:24 -0600 Subject: [PATCH 2/6] Add support for Intel GPU to Basic VAE example * Add support for Intel GPU to Basic VAE example and update README with optional arguments * Remove `default=False` from `store_true` arguments * Fix typo in Readme --- vae/README.md | 7 ++++--- vae/main.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/vae/README.md b/vae/README.md index cda6a33672..e2a432fd1e 100644 --- a/vae/README.md +++ b/vae/README.md @@ -14,8 +14,9 @@ The main.py script accepts the following arguments: optional arguments: --batch-size input batch size for training (default: 128) --epochs number of epochs to train (default: 10) - --no-cuda enables CUDA training - --mps enables GPU on macOS + --no-cuda disables CUDA training + --no-mps disables GPU on macOS + --no-xpu disables XPU training in Intel GPUs --seed random seed (default: 1) --log-interval how many batches to wait before logging training status -``` \ No newline at end of file +``` diff --git a/vae/main.py b/vae/main.py index d69833fbe0..f7915b9ced 100644 --- a/vae/main.py +++ b/vae/main.py @@ -13,10 +13,12 @@ help='input batch size for training (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') -parser.add_argument('--no-cuda', action='store_true', default=False, +parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') -parser.add_argument('--no-mps', action='store_true', default=False, +parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') +parser.add_argument('--no-xpu', action='store_true', + help='disables Intel XPU training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', @@ -24,6 +26,7 @@ args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() +use_xpu = not args.no_xpu and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -31,9 +34,13 @@ device = torch.device("cuda") elif use_mps: device = torch.device("mps") +elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") +print('Device to use: ', device) + kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, From 82129918d78d1b593f11ce66f827122734c77b71 Mon Sep 17 00:00:00 2001 From: eromomon Date: Fri, 21 Feb 2025 11:47:29 -0600 Subject: [PATCH 3/6] Add support for Intel GPU to Siamese Network example --- siamese_network/README.md | 37 ++++++++++++++++++++++++++++++++++++- siamese_network/main.py | 15 +++++++++++---- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index 973a0414a4..19b19f0e76 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -1,7 +1,42 @@ # Siamese Network Example +Siamese network for image similarity estimation. +The network is composed of two identical networks, one for each input. +The output of each network is concatenated and passed to a linear layer. +The output of the linear layer passed through a sigmoid function. +[FaceNet](https://arxiv.org/pdf/1503.03832.pdf) is a variant of the Siamese network. +This implementation varies from FaceNet as we use the `ResNet-18` model from +[Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) as our feature extractor. +In addition, we aren't using `TripletLoss` as the MNIST dataset is simple, so `BCELoss` can do the trick. + ```bash pip install -r requirements.txt python main.py -# CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` + +Optionally, you can add the following arguments to customize your execution. + +```bash +--batch-size input batch size for training (default: 64) +--test-batch-size input batch size for testing (default: 1000) +--epochs number of epochs to train (default: 14) +--lr learning rate (default: 1.0) +--gamma learning rate step gamma (default: 0.7) +--no-cuda disables CUDA training +--no-xpu disables XPU training +--no-mps disables macOS GPU training +--dry-run quickly check a single pass +--seed random seed (default: 1) +--log-interval how many batches to wait before logging training status +--save-model Saving the current Model +``` + +If a GPU device (CUDA, XPU, or MPS) is detected, the example will be executed on the GPU by default; otherwise, it will run on the CPU. + +To disable the GPU option, add the appropriate argument to the command. For example: + +```bash +python main.py --no-xpu +``` + +This command will execute the example on the CPU even if your system successfully detects an XPU. diff --git a/siamese_network/main.py b/siamese_network/main.py index 8f420a9b01..6bd55235e2 100644 --- a/siamese_network/main.py +++ b/siamese_network/main.py @@ -247,32 +247,39 @@ def main(): help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') - parser.add_argument('--no-mps', action='store_true', default=False, + parser.add_argument('--no-xpu', action='store_true', + help='disables XPU training') + parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() torch.manual_seed(args.seed) if use_cuda: device = torch.device("cuda") + elif use_xpu: + device = torch.device("xpu") elif use_mps: device = torch.device("mps") else: device = torch.device("cpu") + print('Device to use: ', device) + train_kwargs = {'batch_size': args.batch_size} test_kwargs = {'batch_size': args.test_batch_size} if use_cuda: From dcaff04e5138f6d1238d0c08bf0ec720a6447b2d Mon Sep 17 00:00:00 2001 From: eromomon Date: Fri, 21 Feb 2025 17:03:23 -0600 Subject: [PATCH 4/6] Add support for Intel GPU to Fast Neural Style example --- fast_neural_style/README.md | 10 ++++++---- .../neural_style/neural_style.py | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fast_neural_style/README.md b/fast_neural_style/README.md index 8057847214..c7fbe80320 100644 --- a/fast_neural_style/README.md +++ b/fast_neural_style/README.md @@ -26,8 +26,9 @@ python neural_style/neural_style.py eval --content-image Date: Mon, 3 Mar 2025 19:02:49 -0600 Subject: [PATCH 5/6] Add support for Intel GPU to GAT example Signed-off-by: jafraustro --- gat/README.md | 1 + gat/main.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/gat/README.md b/gat/README.md index 7bb71bc17b..d7ae967379 100644 --- a/gat/README.md +++ b/gat/README.md @@ -89,6 +89,7 @@ options: epochs to wait for print training and validation evaluation (default: 20) --no-cuda disables CUDA training --no-mps disables macOS GPU training + --no-xpu disables XPU training --dry-run quickly check a single pass --seed S random seed (default: 13) ``` diff --git a/gat/main.py b/gat/main.py index 9c143af8ec..cba703de5c 100644 --- a/gat/main.py +++ b/gat/main.py @@ -303,15 +303,17 @@ def test(model, criterion, input, target, mask): help='dimension of the hidden representation (default: 64)') parser.add_argument('--num-heads', type=int, default=8, help='number of the attention heads (default: 4)') - parser.add_argument('--concat-heads', action='store_true', default=False, + parser.add_argument('--concat-heads', action='store_true', help='wether to concatinate attention heads, or average over them (default: False)') parser.add_argument('--val-every', type=int, default=20, help='epochs to wait for print training and validation evaluation (default: 20)') - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') - parser.add_argument('--no-mps', action='store_true', default=False, + parser.add_argument('--no-xpu', action='store_true', + help='disables XPU training') + parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=13, metavar='S', help='random seed (default: 13)') @@ -320,12 +322,15 @@ def test(model, criterion, input, target, mask): torch.manual_seed(args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() # Set the device to run on if use_cuda: device = torch.device('cuda') elif use_mps: device = torch.device('mps') + elif use_xpu: + device = torch.device('xpu') else: device = torch.device('cpu') print(f'Using {device} device') From 27a4fd99995a46ae43f0cbca44fa5fb9f833b1f2 Mon Sep 17 00:00:00 2001 From: eromomon Date: Mon, 19 May 2025 16:27:21 -0700 Subject: [PATCH 6/6] Add Accelerator Api to Imagenet Example Signed-off-by: eromomon --- imagenet/README.md | 7 ++- imagenet/main.py | 111 ++++++++++++++++++++++---------------- imagenet/requirements.txt | 4 +- 3 files changed, 71 insertions(+), 51 deletions(-) diff --git a/imagenet/README.md b/imagenet/README.md index 9b280f087e..e3f66429b9 100644 --- a/imagenet/README.md +++ b/imagenet/README.md @@ -33,7 +33,9 @@ python main.py -a resnet18 --dummy ## Multi-processing Distributed Data Parallel Training -You should always use the NCCL backend for multi-processing distributed training since it currently provides the best distributed training performance. +If running on CUDA, you should always use the NCCL backend for multi-processing distributed training since it currently provides the best distributed training performance. + +For XPU multiprocessing is not supported as of PyTorch 2.6. ### Single node, multiple GPUs: @@ -59,7 +61,7 @@ python main.py -a resnet50 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backen ```bash usage: main.py [-h] [-a ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N] [--lr LR] [--momentum M] [--wd W] [-p N] [--resume PATH] [-e] [--pretrained] [--world-size WORLD_SIZE] [--rank RANK] - [--dist-url DIST_URL] [--dist-backend DIST_BACKEND] [--seed SEED] [--gpu GPU] [--multiprocessing-distributed] [--dummy] + [--dist-url DIST_URL] [--dist-backend DIST_BACKEND] [--seed SEED] [--gpu GPU] [--no-accel][--multiprocessing-distributed] [--dummy] [DIR] PyTorch ImageNet Training @@ -96,6 +98,7 @@ optional arguments: distributed backend --seed SEED seed for initializing training. --gpu GPU GPU id to use. + --no-accel disables accelerator --multiprocessing-distributed Use multi-processing distributed training to launch N processes per node, which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel training diff --git a/imagenet/main.py b/imagenet/main.py index cc32d50733..dd33470908 100644 --- a/imagenet/main.py +++ b/imagenet/main.py @@ -71,6 +71,8 @@ help='seed for initializing training. ') parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') +parser.add_argument('--no-accel', action='store_true', + help='disables accelerator') parser.add_argument('--multiprocessing-distributed', action='store_true', help='Use multi-processing distributed training to launch ' 'N processes per node, which has N GPUs. This is the ' @@ -104,8 +106,17 @@ def main(): args.distributed = args.world_size > 1 or args.multiprocessing_distributed - if torch.cuda.is_available(): - ngpus_per_node = torch.cuda.device_count() + use_accel = not args.no_accel and torch.accelerator.is_available() + + if use_accel: + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + if device.type =='cuda': + ngpus_per_node = torch.accelerator.device_count() if ngpus_per_node == 1 and args.dist_backend == "nccl": warnings.warn("nccl backend >=2.5 requires GPU count>1, see https://github.com/NVIDIA/nccl/issues/103 perhaps use 'gloo'") else: @@ -127,8 +138,15 @@ def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) + use_accel = not args.no_accel and torch.accelerator.is_available() + + if use_accel: + if args.gpu is not None: + torch.accelerator.set_device_index(args.gpu) + print("Use GPU: {} for training".format(args.gpu)) + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") if args.distributed: if args.dist_url == "env://" and args.rank == -1: @@ -147,16 +165,16 @@ def main_worker(gpu, ngpus_per_node, args): print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() - if not torch.cuda.is_available() and not torch.backends.mps.is_available(): + if not use_accel: print('using CPU, this will be slow') elif args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. - if torch.cuda.is_available(): + if device.type == 'cuda': if args.gpu is not None: torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) + model.cuda(device) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs of the current node. @@ -168,29 +186,17 @@ def main_worker(gpu, ngpus_per_node, args): # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None and torch.cuda.is_available(): - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - elif torch.backends.mps.is_available(): - device = torch.device("mps") - model = model.to(device) - else: + elif device.type == 'cuda': # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() - - if torch.cuda.is_available(): - if args.gpu: - device = torch.device('cuda:{}'.format(args.gpu)) - else: - device = torch.device("cuda") - elif torch.backends.mps.is_available(): - device = torch.device("mps") else: - device = torch.device("cpu") + model.to(device) + + # define loss function (criterion), optimizer, and learning rate scheduler criterion = nn.CrossEntropyLoss().to(device) @@ -207,9 +213,9 @@ def main_worker(gpu, ngpus_per_node, args): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) - elif torch.cuda.is_available(): + else: # Map model to be loaded to specified single gpu. - loc = 'cuda:{}'.format(args.gpu) + loc = f'{device.type}:{args.gpu}' checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] @@ -302,11 +308,14 @@ def main_worker(gpu, ngpus_per_node, args): def train(train_loader, model, criterion, optimizer, epoch, device, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') + + use_accel = not args.no_accel and torch.accelerator.is_available() + + batch_time = AverageMeter('Time', use_accel, ':6.3f', Summary.NONE) + data_time = AverageMeter('Data', use_accel, ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', use_accel, ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', use_accel, ':6.2f', Summary.NONE) + top5 = AverageMeter('Acc@5', use_accel, ':6.2f', Summary.NONE) progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], @@ -349,18 +358,27 @@ def train(train_loader, model, criterion, optimizer, epoch, device, args): def validate(val_loader, model, criterion, args): + use_accel = not args.no_accel and torch.accelerator.is_available() + def run_validate(loader, base_progress=0): + + if use_accel: + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") + with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(loader): i = base_progress + i - if args.gpu is not None and torch.cuda.is_available(): - images = images.cuda(args.gpu, non_blocking=True) - if torch.backends.mps.is_available(): - images = images.to('mps') - target = target.to('mps') - if torch.cuda.is_available(): - target = target.cuda(args.gpu, non_blocking=True) + if use_accel: + if args.gpu is not None and device.type=='cuda': + torch.accelerator.set_device_index(argps.gpu) + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + else: + images = images.to(device) + target = target.to(device) # compute output output = model(images) @@ -379,10 +397,10 @@ def run_validate(loader, base_progress=0): if i % args.print_freq == 0: progress.display(i + 1) - batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) - losses = AverageMeter('Loss', ':.4e', Summary.NONE) - top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) - top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + batch_time = AverageMeter('Time', use_accel, ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', use_accel, ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', use_accel, ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', use_accel, ':6.2f', Summary.AVERAGE) progress = ProgressMeter( len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))), [batch_time, losses, top1, top5], @@ -422,8 +440,9 @@ class Summary(Enum): class AverageMeter(object): """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + def __init__(self, name, use_accel, fmt=':f', summary_type=Summary.AVERAGE): self.name = name + self.use_accel = use_accel self.fmt = fmt self.summary_type = summary_type self.reset() @@ -440,11 +459,9 @@ def update(self, val, n=1): self.count += n self.avg = self.sum / self.count - def all_reduce(self): - if torch.cuda.is_available(): - device = torch.device("cuda") - elif torch.backends.mps.is_available(): - device = torch.device("mps") + def all_reduce(self): + if use_accel: + device = torch.accelerator.current_accelerator() else: device = torch.device("cpu") total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device) diff --git a/imagenet/requirements.txt b/imagenet/requirements.txt index 6cec7414dc..9a083ba390 100644 --- a/imagenet/requirements.txt +++ b/imagenet/requirements.txt @@ -1,2 +1,2 @@ -torch -torchvision==0.20.0 +torch>=2.6 +torchvision