diff --git a/fast_neural_style/README.md b/fast_neural_style/README.md index 8057847214..c7fbe80320 100644 --- a/fast_neural_style/README.md +++ b/fast_neural_style/README.md @@ -26,8 +26,9 @@ python neural_style/neural_style.py eval --content-image 1 or args.multiprocessing_distributed - if torch.cuda.is_available(): - ngpus_per_node = torch.cuda.device_count() + use_accel = not args.no_accel and torch.accelerator.is_available() + + if use_accel: + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") + + print(f"Using device: {device}") + + if device.type =='cuda': + ngpus_per_node = torch.accelerator.device_count() if ngpus_per_node == 1 and args.dist_backend == "nccl": warnings.warn("nccl backend >=2.5 requires GPU count>1, see https://github.com/NVIDIA/nccl/issues/103 perhaps use 'gloo'") else: @@ -127,8 +138,15 @@ def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) + use_accel = not args.no_accel and torch.accelerator.is_available() + + if use_accel: + if args.gpu is not None: + torch.accelerator.set_device_index(args.gpu) + print("Use GPU: {} for training".format(args.gpu)) + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") if args.distributed: if args.dist_url == "env://" and args.rank == -1: @@ -147,16 +165,16 @@ def main_worker(gpu, ngpus_per_node, args): print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() - if not torch.cuda.is_available() and not torch.backends.mps.is_available(): + if not use_accel: print('using CPU, this will be slow') elif args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. - if torch.cuda.is_available(): + if device.type == 'cuda': if args.gpu is not None: torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) + model.cuda(device) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs of the current node. @@ -168,29 +186,17 @@ def main_worker(gpu, ngpus_per_node, args): # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None and torch.cuda.is_available(): - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - elif torch.backends.mps.is_available(): - device = torch.device("mps") - model = model.to(device) - else: + elif device.type == 'cuda': # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() - - if torch.cuda.is_available(): - if args.gpu: - device = torch.device('cuda:{}'.format(args.gpu)) - else: - device = torch.device("cuda") - elif torch.backends.mps.is_available(): - device = torch.device("mps") else: - device = torch.device("cpu") + model.to(device) + + # define loss function (criterion), optimizer, and learning rate scheduler criterion = nn.CrossEntropyLoss().to(device) @@ -207,9 +213,9 @@ def main_worker(gpu, ngpus_per_node, args): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) - elif torch.cuda.is_available(): + else: # Map model to be loaded to specified single gpu. - loc = 'cuda:{}'.format(args.gpu) + loc = f'{device.type}:{args.gpu}' checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] @@ -302,11 +308,14 @@ def main_worker(gpu, ngpus_per_node, args): def train(train_loader, model, criterion, optimizer, epoch, device, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') + + use_accel = not args.no_accel and torch.accelerator.is_available() + + batch_time = AverageMeter('Time', use_accel, ':6.3f', Summary.NONE) + data_time = AverageMeter('Data', use_accel, ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', use_accel, ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', use_accel, ':6.2f', Summary.NONE) + top5 = AverageMeter('Acc@5', use_accel, ':6.2f', Summary.NONE) progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], @@ -349,18 +358,27 @@ def train(train_loader, model, criterion, optimizer, epoch, device, args): def validate(val_loader, model, criterion, args): + use_accel = not args.no_accel and torch.accelerator.is_available() + def run_validate(loader, base_progress=0): + + if use_accel: + device = torch.accelerator.current_accelerator() + else: + device = torch.device("cpu") + with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(loader): i = base_progress + i - if args.gpu is not None and torch.cuda.is_available(): - images = images.cuda(args.gpu, non_blocking=True) - if torch.backends.mps.is_available(): - images = images.to('mps') - target = target.to('mps') - if torch.cuda.is_available(): - target = target.cuda(args.gpu, non_blocking=True) + if use_accel: + if args.gpu is not None and device.type=='cuda': + torch.accelerator.set_device_index(argps.gpu) + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + else: + images = images.to(device) + target = target.to(device) # compute output output = model(images) @@ -379,10 +397,10 @@ def run_validate(loader, base_progress=0): if i % args.print_freq == 0: progress.display(i + 1) - batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) - losses = AverageMeter('Loss', ':.4e', Summary.NONE) - top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) - top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + batch_time = AverageMeter('Time', use_accel, ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', use_accel, ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', use_accel, ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', use_accel, ':6.2f', Summary.AVERAGE) progress = ProgressMeter( len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))), [batch_time, losses, top1, top5], @@ -422,8 +440,9 @@ class Summary(Enum): class AverageMeter(object): """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + def __init__(self, name, use_accel, fmt=':f', summary_type=Summary.AVERAGE): self.name = name + self.use_accel = use_accel self.fmt = fmt self.summary_type = summary_type self.reset() @@ -440,11 +459,9 @@ def update(self, val, n=1): self.count += n self.avg = self.sum / self.count - def all_reduce(self): - if torch.cuda.is_available(): - device = torch.device("cuda") - elif torch.backends.mps.is_available(): - device = torch.device("mps") + def all_reduce(self): + if use_accel: + device = torch.accelerator.current_accelerator() else: device = torch.device("cpu") total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device) diff --git a/imagenet/requirements.txt b/imagenet/requirements.txt index 6cec7414dc..9a083ba390 100644 --- a/imagenet/requirements.txt +++ b/imagenet/requirements.txt @@ -1,2 +1,2 @@ -torch -torchvision==0.20.0 +torch>=2.6 +torchvision diff --git a/mnist/main.py b/mnist/main.py index 184dc4744f..09487639d4 100644 --- a/mnist/main.py +++ b/mnist/main.py @@ -82,21 +82,24 @@ def main(): help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') - parser.add_argument('--no-mps', action='store_true', default=False, + parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--no-xpu', action='store_true', + help='disables Intel GPU training') + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_mps and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -104,6 +107,8 @@ def main(): device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") diff --git a/mnist_forward_forward/README.md b/mnist_forward_forward/README.md index f6ae12e56d..8857c9a6fb 100644 --- a/mnist_forward_forward/README.md +++ b/mnist_forward_forward/README.md @@ -18,6 +18,7 @@ optional arguments: --lr LR learning rate (default: 0.03) --no_cuda disables CUDA training --no_mps disables MPS training + --no_xpu disables XPU training --seed SEED random seed (default: 1) --save_model For saving the current Model --train_size TRAIN_SIZE diff --git a/mnist_forward_forward/main.py b/mnist_forward_forward/main.py index a175126067..e6c2902ed8 100644 --- a/mnist_forward_forward/main.py +++ b/mnist_forward_forward/main.py @@ -102,10 +102,13 @@ def train(self, x_pos, x_neg): help="learning rate (default: 0.03)", ) parser.add_argument( - "--no_cuda", action="store_true", default=False, help="disables CUDA training" + "--no_cuda", action="store_true", help="disables CUDA training" ) parser.add_argument( - "--no_mps", action="store_true", default=False, help="disables MPS training" + "--no_mps", action="store_true", help="disables MPS training" + ) + parser.add_argument( + "--no_xpu", action="store_true", help="disables XPU training" ) parser.add_argument( "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)" @@ -113,7 +116,6 @@ def train(self, x_pos, x_neg): parser.add_argument( "--save_model", action="store_true", - default=False, help="For saving the current Model", ) parser.add_argument( @@ -126,7 +128,6 @@ def train(self, x_pos, x_neg): parser.add_argument( "--save-model", action="store_true", - default=False, help="For Saving the current Model", ) parser.add_argument( @@ -139,10 +140,13 @@ def train(self, x_pos, x_neg): args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() if use_cuda: device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") diff --git a/mnist_rnn/README.md b/mnist_rnn/README.md index c879cb367f..ba63513711 100644 --- a/mnist_rnn/README.md +++ b/mnist_rnn/README.md @@ -8,3 +8,20 @@ pip install -r requirements.txt python main.py # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` + +```bash +optional arguments: + -h, --help show this help message and exit + --batch_size input batch_size for training (default:64) + --testing_batch_size input batch size for testing (default: 1000) + --epochs EPOCHS number of epochs to train (default: 14) + --lr LR learning rate (default: 0.1) + --gamma learning rate step gamma (default: 0.7) + --cuda enables CUDA training + --xpu enables XPU training + --mps enables macos GPU training + --seed SEED random seed (default: 1) + --save_model For saving the current Model + --log_interval how many batches to wait before logging training status + --dry-run quickly check a single pass +``` \ No newline at end of file diff --git a/mnist_rnn/main.py b/mnist_rnn/main.py index 2fa64c00d6..f6c1ff3d48 100644 --- a/mnist_rnn/main.py +++ b/mnist_rnn/main.py @@ -93,15 +93,17 @@ def main(): help='learning rate step gamma (default: 0.7)') parser.add_argument('--cuda', action='store_true', default=False, help='enables CUDA training') - parser.add_argument('--mps', action="store_true", default=False, + parser.add_argument('--mps', action="store_true", help="enables MPS training") - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--xpu', action='store_true', + help='enables XPU training') + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='for Saving the current Model') args = parser.parse_args() @@ -109,6 +111,8 @@ def main(): device = "cuda" elif args.mps and not args.cuda: device = "mps" + elif args.xpu: + device = "xpu" else: device = "cpu" diff --git a/siamese_network/README.md b/siamese_network/README.md index 973a0414a4..19b19f0e76 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -1,7 +1,42 @@ # Siamese Network Example +Siamese network for image similarity estimation. +The network is composed of two identical networks, one for each input. +The output of each network is concatenated and passed to a linear layer. +The output of the linear layer passed through a sigmoid function. +[FaceNet](https://arxiv.org/pdf/1503.03832.pdf) is a variant of the Siamese network. +This implementation varies from FaceNet as we use the `ResNet-18` model from +[Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) as our feature extractor. +In addition, we aren't using `TripletLoss` as the MNIST dataset is simple, so `BCELoss` can do the trick. + ```bash pip install -r requirements.txt python main.py -# CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` + +Optionally, you can add the following arguments to customize your execution. + +```bash +--batch-size input batch size for training (default: 64) +--test-batch-size input batch size for testing (default: 1000) +--epochs number of epochs to train (default: 14) +--lr learning rate (default: 1.0) +--gamma learning rate step gamma (default: 0.7) +--no-cuda disables CUDA training +--no-xpu disables XPU training +--no-mps disables macOS GPU training +--dry-run quickly check a single pass +--seed random seed (default: 1) +--log-interval how many batches to wait before logging training status +--save-model Saving the current Model +``` + +If a GPU device (CUDA, XPU, or MPS) is detected, the example will be executed on the GPU by default; otherwise, it will run on the CPU. + +To disable the GPU option, add the appropriate argument to the command. For example: + +```bash +python main.py --no-xpu +``` + +This command will execute the example on the CPU even if your system successfully detects an XPU. diff --git a/siamese_network/main.py b/siamese_network/main.py index 8f420a9b01..6bd55235e2 100644 --- a/siamese_network/main.py +++ b/siamese_network/main.py @@ -247,32 +247,39 @@ def main(): help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') - parser.add_argument('--no-mps', action='store_true', default=False, + parser.add_argument('--no-xpu', action='store_true', + help='disables XPU training') + parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') - parser.add_argument('--dry-run', action='store_true', default=False, + parser.add_argument('--dry-run', action='store_true', help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') - parser.add_argument('--save-model', action='store_true', default=False, + parser.add_argument('--save-model', action='store_true', help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() torch.manual_seed(args.seed) if use_cuda: device = torch.device("cuda") + elif use_xpu: + device = torch.device("xpu") elif use_mps: device = torch.device("mps") else: device = torch.device("cpu") + print('Device to use: ', device) + train_kwargs = {'batch_size': args.batch_size} test_kwargs = {'batch_size': args.test_batch_size} if use_cuda: diff --git a/vae/README.md b/vae/README.md index cda6a33672..e2a432fd1e 100644 --- a/vae/README.md +++ b/vae/README.md @@ -14,8 +14,9 @@ The main.py script accepts the following arguments: optional arguments: --batch-size input batch size for training (default: 128) --epochs number of epochs to train (default: 10) - --no-cuda enables CUDA training - --mps enables GPU on macOS + --no-cuda disables CUDA training + --no-mps disables GPU on macOS + --no-xpu disables XPU training in Intel GPUs --seed random seed (default: 1) --log-interval how many batches to wait before logging training status -``` \ No newline at end of file +``` diff --git a/vae/main.py b/vae/main.py index d69833fbe0..f7915b9ced 100644 --- a/vae/main.py +++ b/vae/main.py @@ -13,10 +13,12 @@ help='input batch size for training (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') -parser.add_argument('--no-cuda', action='store_true', default=False, +parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training') -parser.add_argument('--no-mps', action='store_true', default=False, +parser.add_argument('--no-mps', action='store_true', help='disables macOS GPU training') +parser.add_argument('--no-xpu', action='store_true', + help='disables Intel XPU training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', @@ -24,6 +26,7 @@ args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() +use_xpu = not args.no_xpu and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -31,9 +34,13 @@ device = torch.device("cuda") elif use_mps: device = torch.device("mps") +elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") +print('Device to use: ', device) + kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True,