From 62e7625838d22180d4238b36708776f16dbc1ce0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 12 Jun 2026 22:26:39 +0200 Subject: [PATCH 1/2] Fix wave training script imports --- gcp/wave_pytorch_gcp.py | 51 +++++++++++++++++++------- train.py | 80 +++++++++++++++++++++++++++++++---------- train_tf.py | 54 ++++++++++++++++++++-------- utils/utils.py | 38 +++++++++++++++----- 4 files changed, 171 insertions(+), 52 deletions(-) diff --git a/gcp/wave_pytorch_gcp.py b/gcp/wave_pytorch_gcp.py index 0fb90e8..b1f9def 100644 --- a/gcp/wave_pytorch_gcp.py +++ b/gcp/wave_pytorch_gcp.py @@ -6,12 +6,15 @@ import scipy.io import torch +import numpy as np -from utils import * +from utils.utils import normalize, splitdata, stdpt # set printoptions torch.set_printoptions(linewidth=320, precision=8) -np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 +np.set_printoptions( + linewidth=320, formatter={"float_kind": "{:11.5g}".format} +) # format short g, %precision=5 pathd = "data/" pathr = "results/" @@ -30,24 +33,32 @@ def runexample(H, model, str, lr=0.001, amsgrad=False): cuda = torch.cuda.is_available() os.makedirs(f"{pathr}models", exist_ok=True) - name = f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".").replace("[", "_").replace("]", "_") + name = ( + f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".") + .replace("[", "_") + .replace("]", "_") + ) tica = time.time() device = torch.device("cuda:0" if cuda else "cpu") - print(f"Running {name} on {device.type}\n{torch.cuda.get_device_properties(0) if cuda else ''}") + print( + f"Running {name} on {device.type}\n{torch.cuda.get_device_properties(0) if cuda else ''}" + ) if not os.path.isfile(pathd + data): os.system(f"wget -P data/ https://storage.googleapis.com/ultralytics/{data}") mat = scipy.io.loadmat(pathd + data) x = mat["inputs"] # inputs (nx512) [waveform1 waveform2] y = mat["outputs"][:, 1:2] # outputs (nx4) [position(mm), time(ns), PE, E(MeV)] - nz, nx = x.shape + _nz, _nx = x.shape ny = y.shape[1] x, _, _ = normalize(x, 1) # normalize each input row - y, ymu, ys = normalize(y, 0) # normalize each output column + y, _ymu, ys = normalize(y, 0) # normalize each output column x, y = torch.Tensor(x), torch.Tensor(y) - x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=True) + x, y, xv, yv, xt, yt = splitdata( + x, y, train=0.70, validate=0.15, test=0.15, shuffle=True + ) labels = ["train", "validate", "test"] print(model) @@ -91,12 +102,17 @@ def runexample(H, model, str, lr=0.001, amsgrad=False): loss.backward() optimizer.step() else: - print("WARNING: Validation loss still decreasing after %g epochs (train longer)." % (i + 1)) + print( + "WARNING: Validation loss still decreasing after %g epochs (train longer)." + % (i + 1) + ) # torch.save(best[2], pathr + 'models/' + name + '.pt') model.load_state_dict(best[2]) dt = time.time() - tica - print(f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:") + print( + f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:" + ) loss, std = np.zeros(3), np.zeros((3, ny)) for i, (xi, yi) in enumerate(((x, y), (xv, yv), (xt, yt))): loss[i], std[i] = stdpt(model(xi) - yi, ys) @@ -215,7 +231,9 @@ def tslr(): # TS learning rate tsv = np.logspace(-5, -2, 13) tsy = [] for a in tsv: - tsy.extend(runexample(H, model=WAVE(H), str=("." + "Tanh"), lr=a) for _ in range(10)) + tsy.extend( + runexample(H, model=WAVE(H), str=("." + "Tanh"), lr=a) for _ in range(10) + ) scipy.io.savemat(f"{pathr}TS.lr.mat", dict(tsv=tsv, tsy=np.array(tsy))) @@ -224,7 +242,10 @@ def tsams(): # TS AMSgrad tsv = [False, True] tsy = [] for a in tsv: - tsy.extend(runexample(H, model=WAVE(H), str=f".TanhAMS{str(a)}", amsgrad=a) for _ in range(3)) + tsy.extend( + runexample(H, model=WAVE(H), str=f".TanhAMS{a!s}", amsgrad=a) + for _ in range(3) + ) scipy.io.savemat(f"{pathr}TS.AMSgrad.mat", dict(tsv=tsv, tsy=np.array(tsy))) @@ -243,7 +264,13 @@ def tsshape(): # TS network shape # tsv = ['Tanh', 'LogSigmoid', 'Softsign', 'ELU'] # tsv = np.logspace(-4, -2, 11) - tsv = [[512, 23, 1], [512, 64, 8, 1], [512, 108, 23, 5, 1], [512, 147, 42, 12, 3, 1], [512, 181, 64, 23, 8, 3, 1]] + tsv = [ + [512, 23, 1], + [512, 64, 8, 1], + [512, 108, 23, 5, 1], + [512, 147, 42, 12, 3, 1], + [512, 181, 64, 23, 8, 3, 1], + ] H = tsv[0] class WAVE(torch.nn.Module): diff --git a/train.py b/train.py index 910bb8e..56a3694 100644 --- a/train.py +++ b/train.py @@ -3,11 +3,13 @@ import argparse import os +import numpy as np import scipy.io +import torch import torch.nn as nn -from utils.torch_utils import * -from utils.utils import * +from utils.torch_utils import init_seeds, select_device +from utils.utils import model_info, normalize, patienceStopper, splitdata torch.backends.cudnn.benchmark = True # unsuitable for multiscale @@ -25,7 +27,11 @@ def train(H, model, str, lr=0.001): cuda = torch.cuda.is_available() os.makedirs(f"{pathr}models", exist_ok=True) - name = f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".").replace("[", "_").replace("]", "_") + name = ( + f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".") + .replace("[", "_") + .replace("]", "_") + ) print(f"Running {name}") device = select_device() @@ -35,13 +41,15 @@ def train(H, model, str, lr=0.001): mat = scipy.io.loadmat(pathd + data) x = mat["inputs"][:] # inputs (nx512) [waveform1 waveform2] y = mat["outputs"][:, 0:2] # outputs (nx4) [position(mm), time(ns), PE, E(MeV)] - nz, nx = x.shape + _nz, _nx = x.shape ny = y.shape[1] x, _, _ = normalize(x, 1) # normalize each input row - y, ymu, ys = normalize(y, 0) # normalize each output column + y, _ymu, ys = normalize(y, 0) # normalize each output column x, y = torch.Tensor(x), torch.Tensor(y) - x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=False) + x, y, xv, yv, xt, yt = splitdata( + x, y, train=0.70, validate=0.15, test=0.15, shuffle=False + ) # torch.nn.init.constant_(model.out.weight.data, ys.item(0)) # torch.nn.init.constant_(model.out.bias.data, ymu.item(0)) @@ -116,7 +124,10 @@ def train(H, model, str, lr=0.001): std[i] = r.std(0).cpu().numpy() * ys print(f"{loss[i]:.5f} {std[i, :]} {labels[i]}") - scipy.io.savemat(pathr + name + ".mat", dict(bestepoch=stopper.bestloss, loss=loss, std=std, L=L, name=name)) + scipy.io.savemat( + pathr + name + ".mat", + dict(bestepoch=stopper.bestloss, loss=loss, std=std, L=L, name=name), + ) # files.download(pathr + name + '.mat') return np.concatenate(([stopper.bestloss], np.array(loss), np.array(std.ravel()))) @@ -149,18 +160,24 @@ def __init__(self, n_out=2): """Initializes the WAVE4 model with specified output layers and configurations for convolutional layers.""" super().__init__() self.layer1 = nn.Sequential( - nn.Conv2d(1, 32, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False), + nn.Conv2d( + 1, 32, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False + ), nn.BatchNorm2d(32), nn.LeakyReLU(0.1), ) # nn.MaxPool2d(kernel_size=(1, 2), stride=1)) self.layer2 = nn.Sequential( - nn.Conv2d(32, 64, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False), + nn.Conv2d( + 32, 64, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False + ), nn.BatchNorm2d(64), nn.LeakyReLU(0.1), ) # nn.MaxPool2d(kernel_size=(1, 2), stride=1)) - self.layer3 = nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0)) + self.layer3 = nn.Conv2d( + 64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0) + ) def forward(self, x): # x.shape = [bs, 512] """Forward pass for processing input tensor through convolutional layers and reshaping output for @@ -185,20 +202,37 @@ def __init__(self, n_out=2): super().__init__() n = 32 self.layer1 = nn.Sequential( - nn.Conv2d(in_channels=2, out_channels=n, kernel_size=(1, 33), stride=(1, 2), padding=(0, 16), bias=False), + nn.Conv2d( + in_channels=2, + out_channels=n, + kernel_size=(1, 33), + stride=(1, 2), + padding=(0, 16), + bias=False, + ), nn.BatchNorm2d(n), nn.LeakyReLU(0.1), ) self.layer2 = nn.Sequential( nn.Conv2d( - in_channels=n, out_channels=n * 2, kernel_size=(1, 17), stride=(1, 2), padding=(0, 8), bias=False + in_channels=n, + out_channels=n * 2, + kernel_size=(1, 17), + stride=(1, 2), + padding=(0, 8), + bias=False, ), nn.BatchNorm2d(n * 2), nn.LeakyReLU(0.1), ) self.layer3 = nn.Sequential( nn.Conv2d( - in_channels=n * 2, out_channels=n * 4, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False + in_channels=n * 2, + out_channels=n * 4, + kernel_size=(1, 9), + stride=(1, 2), + padding=(0, 4), + bias=False, ), nn.BatchNorm2d(n * 4), nn.LeakyReLU(0.1), @@ -229,18 +263,24 @@ def __init__(self, n_out=2): """Initializes the WAVE2 model architecture components.""" super().__init__() self.layer1 = nn.Sequential( - nn.Conv2d(1, 32, kernel_size=(2, 30), stride=(1, 2), padding=(1, 15), bias=False), + nn.Conv2d( + 1, 32, kernel_size=(2, 30), stride=(1, 2), padding=(1, 15), bias=False + ), nn.BatchNorm2d(32), nn.LeakyReLU(0.1), nn.MaxPool2d(kernel_size=(1, 2), stride=1), ) self.layer2 = nn.Sequential( - nn.Conv2d(32, 64, kernel_size=(2, 30), stride=(1, 2), padding=(0, 15), bias=False), + nn.Conv2d( + 32, 64, kernel_size=(2, 30), stride=(1, 2), padding=(0, 15), bias=False + ), nn.BatchNorm2d(64), nn.LeakyReLU(0.1), nn.MaxPool2d(kernel_size=(1, 2), stride=1), ) - self.layer3 = nn.Sequential(nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0))) + self.layer3 = nn.Sequential( + nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0)) + ) def forward(self, x): # x.shape = [bs, 512] """Forward pass for processing input tensor x through sequential layers, reshaping as needed for the model.""" @@ -257,8 +297,12 @@ def forward(self, x): # x.shape = [bs, 512] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=5000, help="number of epochs") - parser.add_argument("--batch-size", type=int, default=2000, help="size of each image batch") - parser.add_argument("--printerval", type=int, default=1, help="print results interval") + parser.add_argument( + "--batch-size", type=int, default=2000, help="size of each image batch" + ) + parser.add_argument( + "--printerval", type=int, default=1, help="print results interval" + ) parser.add_argument("--var", nargs="+", default=[3], help="debug list") opt = parser.parse_args() opt.var = [float(x) for x in opt.var] diff --git a/train_tf.py b/train_tf.py index aa41c65..0b32ebf 100644 --- a/train_tf.py +++ b/train_tf.py @@ -3,12 +3,13 @@ import os import time +import numpy as np import plotly.graph_objs as go import scipy.io import tensorflow as tf from plotly.offline import plot -from utils.utils import * +from utils.utils import normalize, splitdata, stdtf tf.enable_eager_execution() @@ -27,7 +28,11 @@ def runexample(H, model, str): tf.set_random_seed(1) path = "data/" os.makedirs(f"{path}models", exist_ok=True) - name = f"{data[:-4]}{H[:]}{lr:g}lr{eps:g}eps{str}".replace(", ", "_").replace("[", "_").replace("]", "_") + name = ( + f"{data[:-4]}{H[:]}{lr:g}lr{eps:g}eps{str}".replace(", ", "_") + .replace("[", "_") + .replace("]", "_") + ) tica = time.time() device = "/gpu:0" if cuda else "/cpu:0" @@ -38,14 +43,16 @@ def runexample(H, model, str): mat = scipy.io.loadmat(path + data) x = mat["inputs"] # inputs (nx512) [waveform1 waveform2] y = mat["outputs"][:, 0:2] # outputs (nx4) [position(mm), time(ns), PE, E(MeV)] - nz, nx = x.shape + _nz, _nx = x.shape ny = y.shape[1] if model is None: # model = WAVE(nx, ny, H) model = tf.keras.Sequential( [ - tf.keras.layers.Dense(H[0], activation=tf.tanh, input_shape=(512,)), # must declare input shape + tf.keras.layers.Dense( + H[0], activation=tf.tanh, input_shape=(512,) + ), # must declare input shape tf.keras.layers.Dense(H[1], activation=tf.tanh), tf.keras.layers.Dense( H[2], @@ -56,8 +63,10 @@ def runexample(H, model, str): ) x, _, _ = normalize(x, 1) # normalize each input row - y, ymu, ys = normalize(y, 0) # normalize each output column - x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=False) + y, _ymu, ys = normalize(y, 0) # normalize each output column + x, y, xv, yv, xt, yt = splitdata( + x, y, train=0.70, validate=0.15, test=0.15, shuffle=False + ) labels = ["train", "validate", "test"] print(model) @@ -82,7 +91,9 @@ def criteria(y_pred, y): # MSE with tf.GradientTape() as tape: y_pred = model(x) loss = criteria(y_pred, y) - grads = tape.gradient(loss, model.variables) # DO NOT INDENT, not inside tf.GradientTape context manager + grads = tape.gradient( + loss, model.variables + ) # DO NOT INDENT, not inside tf.GradientTape context manager y_predv = model(xv) # Compute and print loss @@ -94,7 +105,9 @@ def criteria(y_pred, y): # MSE if L[i, 1] < best[1]: best = (i, L[i, 1], None) if (i - best[0]) > validations: - print(f"\n{validations:g} validation checks exceeded at epoch {i:g}.") + print( + f"\n{validations:g} validation checks exceeded at epoch {i:g}." + ) break if i % printInterval == 0: # print and save progress @@ -104,14 +117,22 @@ def criteria(y_pred, y): # MSE ticb = time.time() # Apply the gradient to the model - optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step()) + optimizer.apply_gradients( + zip(grads, model.variables), + global_step=tf.train.get_or_create_global_step(), + ) else: - print("WARNING: Validation loss still decreasing after %g epochs (train longer)." % (i + 1)) + print( + "WARNING: Validation loss still decreasing after %g epochs (train longer)." + % (i + 1) + ) # torch.save(best[2], path + 'models/' + name + '.pt') # model.load_state_dict(best[2]) dt = time.time() - tica - print(f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:") + print( + f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:" + ) loss, std = np.zeros(3), np.zeros((3, ny)) for i, (xi, yi) in enumerate(((x, y), (xv, yv), (xt, yt))): loss[i], std[i] = stdtf(model(xi) - yi, ys) @@ -121,8 +142,13 @@ def criteria(y_pred, y): # MSE data = [] for i, s in enumerate(labels): - data.append(go.Scatter(x=np.arange(epochs), y=L[:, i], mode="markers+lines", name=s)) - layout = go.Layout(xaxis=dict(type="linear", autorange=True), yaxis=dict(type="log", autorange=True)) + data.append( + go.Scatter(x=np.arange(epochs), y=L[:, i], mode="markers+lines", name=s) + ) + layout = go.Layout( + xaxis=dict(type="linear", autorange=True), + yaxis=dict(type="log", autorange=True), + ) # configure_plotly_browser_state() plot(go.Figure(data=data, layout=layout)) @@ -130,4 +156,4 @@ def criteria(y_pred, y): # MSE if __name__ == "__main__": H = [128, 32, 8] for i in range(1): - runexample(H, None, f".{str(i)}") + runexample(H, None, f".{i!s}") diff --git a/utils/utils.py b/utils/utils.py index 49de949..3a3784b 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -8,7 +8,9 @@ # Set printoptions torch.set_printoptions(linewidth=1320, precision=5, profile="long") -np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 +np.set_printoptions( + linewidth=320, formatter={"float_kind": "{:11.5g}".format} +) # format short g, %precision=5 def normalize(x, axis=None): # normalize x mean and std by axis @@ -30,7 +32,9 @@ def shuffledata(x, y): # randomly shuffle x and y by same axis=0 indices return x[i], y[i] -def splitdata(x, y, train=0.7, validate=0.15, test=0.15, shuffle=False): # split training data +def splitdata( + x, y, train=0.7, validate=0.15, test=0.15, shuffle=False +): # split training data """Splits data arrays x and y into training, validation, and test sets with optional shuffling.""" n = x.shape[0] if shuffle: @@ -38,7 +42,14 @@ def splitdata(x, y, train=0.7, validate=0.15, test=0.15, shuffle=False): # spli i = round(n * train) # train j = round(n * validate) + i # validate k = round(n * test) + j # test - return x[:i], y[:i], x[i:j], y[i:j], x[j:k], y[j:k] # xy train, xy validate, xy test + return ( + x[:i], + y[:i], + x[i:j], + y[i:j], + x[j:k], + y[j:k], + ) # xy train, xy validate, xy test def stdpt(r, ys): # MSE loss + standard deviation (pytorch) @@ -62,8 +73,13 @@ def model_info(model): shape, mean, and std. """ n_p = sum(x.numel() for x in model.parameters()) # number parameters - n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients - print("\n%5s %40s %9s %12s %20s %10s %10s" % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma")) + n_g = sum( + x.numel() for x in model.parameters() if x.requires_grad + ) # number gradients + print( + "\n%5s %40s %9s %12s %20s %10s %10s" + % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma") + ) for i, (name, p) in enumerate(model.named_parameters()): name = name.replace("module_list.", "") print( @@ -103,7 +119,9 @@ def step(self, loss, metrics=None, model=None): self.num_bad_epochs += 1 self.epoch += 1 self.first(model) if self.epoch == 0 else None - self.printepoch(self.epoch, loss, metrics) if self.epoch % self.printerval == 0 else None + self.printepoch( + self.epoch, loss, metrics + ) if self.epoch % self.printerval == 0 else None if loss < self.bestloss: self.bestloss = loss @@ -112,7 +130,9 @@ def step(self, loss, metrics=None, model=None): self.num_bad_epochs = 0 if model: if self.bestmodel: - self.bestmodel.load_state_dict(model.state_dict()) # faster than deepcopy + self.bestmodel.load_state_dict( + model.state_dict() + ) # faster than deepcopy else: self.bestmodel = copy.deepcopy(model) @@ -120,7 +140,9 @@ def step(self, loss, metrics=None, model=None): self.final(f"{self.patience:g} Patience exceeded at epoch {self.epoch:g}.") return True elif self.epoch >= self.epochs: - self.final(f"WARNING: {self.patience:g} Patience not exceeded by epoch {self.epoch:g} (train longer).") + self.final( + f"WARNING: {self.patience:g} Patience not exceeded by epoch {self.epoch:g} (train longer)." + ) return True else: return False From d813d0a365a8137ea7cc9a333e5ebe0d7088bd28 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Fri, 12 Jun 2026 20:28:21 +0000 Subject: [PATCH 2/2] Auto-format by https://ultralytics.com/actions --- README.md | 1 + gcp/wave_pytorch_gcp.py | 41 ++++++++++------------------------------ train.py | 42 ++++++++++------------------------------- train_tf.py | 35 ++++++++-------------------------- utils/utils.py | 29 +++++++--------------------- 5 files changed, 36 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index bd4d130..91cf4b3 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Before diving into waveform vector exploitation with WAVE, ensure your environme ```bash python -m pip install -U -r requirements.txt ``` + - `numpy` - `scipy` - `torch` diff --git a/gcp/wave_pytorch_gcp.py b/gcp/wave_pytorch_gcp.py index b1f9def..5c9baa9 100644 --- a/gcp/wave_pytorch_gcp.py +++ b/gcp/wave_pytorch_gcp.py @@ -4,17 +4,15 @@ import os import time +import numpy as np import scipy.io import torch -import numpy as np from utils.utils import normalize, splitdata, stdpt # set printoptions torch.set_printoptions(linewidth=320, precision=8) -np.set_printoptions( - linewidth=320, formatter={"float_kind": "{:11.5g}".format} -) # format short g, %precision=5 +np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 pathd = "data/" pathr = "results/" @@ -33,17 +31,11 @@ def runexample(H, model, str, lr=0.001, amsgrad=False): cuda = torch.cuda.is_available() os.makedirs(f"{pathr}models", exist_ok=True) - name = ( - f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".") - .replace("[", "_") - .replace("]", "_") - ) + name = f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".").replace("[", "_").replace("]", "_") tica = time.time() device = torch.device("cuda:0" if cuda else "cpu") - print( - f"Running {name} on {device.type}\n{torch.cuda.get_device_properties(0) if cuda else ''}" - ) + print(f"Running {name} on {device.type}\n{torch.cuda.get_device_properties(0) if cuda else ''}") if not os.path.isfile(pathd + data): os.system(f"wget -P data/ https://storage.googleapis.com/ultralytics/{data}") @@ -56,9 +48,7 @@ def runexample(H, model, str, lr=0.001, amsgrad=False): x, _, _ = normalize(x, 1) # normalize each input row y, _ymu, ys = normalize(y, 0) # normalize each output column x, y = torch.Tensor(x), torch.Tensor(y) - x, y, xv, yv, xt, yt = splitdata( - x, y, train=0.70, validate=0.15, test=0.15, shuffle=True - ) + x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=True) labels = ["train", "validate", "test"] print(model) @@ -102,17 +92,12 @@ def runexample(H, model, str, lr=0.001, amsgrad=False): loss.backward() optimizer.step() else: - print( - "WARNING: Validation loss still decreasing after %g epochs (train longer)." - % (i + 1) - ) + print("WARNING: Validation loss still decreasing after %g epochs (train longer)." % (i + 1)) # torch.save(best[2], pathr + 'models/' + name + '.pt') model.load_state_dict(best[2]) dt = time.time() - tica - print( - f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:" - ) + print(f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:") loss, std = np.zeros(3), np.zeros((3, ny)) for i, (xi, yi) in enumerate(((x, y), (xv, yv), (xt, yt))): loss[i], std[i] = stdpt(model(xi) - yi, ys) @@ -225,15 +210,12 @@ def forward(self, x): def tslr(): # TS learning rate - """Generate and save learning rate (LR) logs for time-series models with varying LRs using WAVE and TanH - activation. + """Generate and save learning rate (LR) logs for time-series models with varying LRs using WAVE and TanH activation. """ tsv = np.logspace(-5, -2, 13) tsy = [] for a in tsv: - tsy.extend( - runexample(H, model=WAVE(H), str=("." + "Tanh"), lr=a) for _ in range(10) - ) + tsy.extend(runexample(H, model=WAVE(H), str=("." + "Tanh"), lr=a) for _ in range(10)) scipy.io.savemat(f"{pathr}TS.lr.mat", dict(tsv=tsv, tsy=np.array(tsy))) @@ -242,10 +224,7 @@ def tsams(): # TS AMSgrad tsv = [False, True] tsy = [] for a in tsv: - tsy.extend( - runexample(H, model=WAVE(H), str=f".TanhAMS{a!s}", amsgrad=a) - for _ in range(3) - ) + tsy.extend(runexample(H, model=WAVE(H), str=f".TanhAMS{a!s}", amsgrad=a) for _ in range(3)) scipy.io.savemat(f"{pathr}TS.AMSgrad.mat", dict(tsv=tsv, tsy=np.array(tsy))) diff --git a/train.py b/train.py index 56a3694..908ddfc 100644 --- a/train.py +++ b/train.py @@ -27,11 +27,7 @@ def train(H, model, str, lr=0.001): cuda = torch.cuda.is_available() os.makedirs(f"{pathr}models", exist_ok=True) - name = ( - f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".") - .replace("[", "_") - .replace("]", "_") - ) + name = f"{data[:-4]}{H[:]}{lr:g}lr{str}".replace(", ", ".").replace("[", "_").replace("]", "_") print(f"Running {name}") device = select_device() @@ -47,9 +43,7 @@ def train(H, model, str, lr=0.001): x, _, _ = normalize(x, 1) # normalize each input row y, _ymu, ys = normalize(y, 0) # normalize each output column x, y = torch.Tensor(x), torch.Tensor(y) - x, y, xv, yv, xt, yt = splitdata( - x, y, train=0.70, validate=0.15, test=0.15, shuffle=False - ) + x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=False) # torch.nn.init.constant_(model.out.weight.data, ys.item(0)) # torch.nn.init.constant_(model.out.bias.data, ymu.item(0)) @@ -160,24 +154,18 @@ def __init__(self, n_out=2): """Initializes the WAVE4 model with specified output layers and configurations for convolutional layers.""" super().__init__() self.layer1 = nn.Sequential( - nn.Conv2d( - 1, 32, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False - ), + nn.Conv2d(1, 32, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False), nn.BatchNorm2d(32), nn.LeakyReLU(0.1), ) # nn.MaxPool2d(kernel_size=(1, 2), stride=1)) self.layer2 = nn.Sequential( - nn.Conv2d( - 32, 64, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False - ), + nn.Conv2d(32, 64, kernel_size=(1, 9), stride=(1, 2), padding=(0, 4), bias=False), nn.BatchNorm2d(64), nn.LeakyReLU(0.1), ) # nn.MaxPool2d(kernel_size=(1, 2), stride=1)) - self.layer3 = nn.Conv2d( - 64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0) - ) + self.layer3 = nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0)) def forward(self, x): # x.shape = [bs, 512] """Forward pass for processing input tensor through convolutional layers and reshaping output for @@ -263,24 +251,18 @@ def __init__(self, n_out=2): """Initializes the WAVE2 model architecture components.""" super().__init__() self.layer1 = nn.Sequential( - nn.Conv2d( - 1, 32, kernel_size=(2, 30), stride=(1, 2), padding=(1, 15), bias=False - ), + nn.Conv2d(1, 32, kernel_size=(2, 30), stride=(1, 2), padding=(1, 15), bias=False), nn.BatchNorm2d(32), nn.LeakyReLU(0.1), nn.MaxPool2d(kernel_size=(1, 2), stride=1), ) self.layer2 = nn.Sequential( - nn.Conv2d( - 32, 64, kernel_size=(2, 30), stride=(1, 2), padding=(0, 15), bias=False - ), + nn.Conv2d(32, 64, kernel_size=(2, 30), stride=(1, 2), padding=(0, 15), bias=False), nn.BatchNorm2d(64), nn.LeakyReLU(0.1), nn.MaxPool2d(kernel_size=(1, 2), stride=1), ) - self.layer3 = nn.Sequential( - nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0)) - ) + self.layer3 = nn.Sequential(nn.Conv2d(64, n_out, kernel_size=(2, 64), stride=(1, 1), padding=(0, 0))) def forward(self, x): # x.shape = [bs, 512] """Forward pass for processing input tensor x through sequential layers, reshaping as needed for the model.""" @@ -297,12 +279,8 @@ def forward(self, x): # x.shape = [bs, 512] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=5000, help="number of epochs") - parser.add_argument( - "--batch-size", type=int, default=2000, help="size of each image batch" - ) - parser.add_argument( - "--printerval", type=int, default=1, help="print results interval" - ) + parser.add_argument("--batch-size", type=int, default=2000, help="size of each image batch") + parser.add_argument("--printerval", type=int, default=1, help="print results interval") parser.add_argument("--var", nargs="+", default=[3], help="debug list") opt = parser.parse_args() opt.var = [float(x) for x in opt.var] diff --git a/train_tf.py b/train_tf.py index 0b32ebf..304d378 100644 --- a/train_tf.py +++ b/train_tf.py @@ -28,11 +28,7 @@ def runexample(H, model, str): tf.set_random_seed(1) path = "data/" os.makedirs(f"{path}models", exist_ok=True) - name = ( - f"{data[:-4]}{H[:]}{lr:g}lr{eps:g}eps{str}".replace(", ", "_") - .replace("[", "_") - .replace("]", "_") - ) + name = f"{data[:-4]}{H[:]}{lr:g}lr{eps:g}eps{str}".replace(", ", "_").replace("[", "_").replace("]", "_") tica = time.time() device = "/gpu:0" if cuda else "/cpu:0" @@ -50,9 +46,7 @@ def runexample(H, model, str): # model = WAVE(nx, ny, H) model = tf.keras.Sequential( [ - tf.keras.layers.Dense( - H[0], activation=tf.tanh, input_shape=(512,) - ), # must declare input shape + tf.keras.layers.Dense(H[0], activation=tf.tanh, input_shape=(512,)), # must declare input shape tf.keras.layers.Dense(H[1], activation=tf.tanh), tf.keras.layers.Dense( H[2], @@ -64,9 +58,7 @@ def runexample(H, model, str): x, _, _ = normalize(x, 1) # normalize each input row y, _ymu, ys = normalize(y, 0) # normalize each output column - x, y, xv, yv, xt, yt = splitdata( - x, y, train=0.70, validate=0.15, test=0.15, shuffle=False - ) + x, y, xv, yv, xt, yt = splitdata(x, y, train=0.70, validate=0.15, test=0.15, shuffle=False) labels = ["train", "validate", "test"] print(model) @@ -91,9 +83,7 @@ def criteria(y_pred, y): # MSE with tf.GradientTape() as tape: y_pred = model(x) loss = criteria(y_pred, y) - grads = tape.gradient( - loss, model.variables - ) # DO NOT INDENT, not inside tf.GradientTape context manager + grads = tape.gradient(loss, model.variables) # DO NOT INDENT, not inside tf.GradientTape context manager y_predv = model(xv) # Compute and print loss @@ -105,9 +95,7 @@ def criteria(y_pred, y): # MSE if L[i, 1] < best[1]: best = (i, L[i, 1], None) if (i - best[0]) > validations: - print( - f"\n{validations:g} validation checks exceeded at epoch {i:g}." - ) + print(f"\n{validations:g} validation checks exceeded at epoch {i:g}.") break if i % printInterval == 0: # print and save progress @@ -122,17 +110,12 @@ def criteria(y_pred, y): # MSE global_step=tf.train.get_or_create_global_step(), ) else: - print( - "WARNING: Validation loss still decreasing after %g epochs (train longer)." - % (i + 1) - ) + print("WARNING: Validation loss still decreasing after %g epochs (train longer)." % (i + 1)) # torch.save(best[2], path + 'models/' + name + '.pt') # model.load_state_dict(best[2]) dt = time.time() - tica - print( - f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:" - ) + print(f"\nFinished {i + 1:g} epochs in {dt:.3f}s ({i / dt:.3f} epochs/s)\nBest results from epoch {best[0]:g}:") loss, std = np.zeros(3), np.zeros((3, ny)) for i, (xi, yi) in enumerate(((x, y), (xv, yv), (xt, yt))): loss[i], std[i] = stdtf(model(xi) - yi, ys) @@ -142,9 +125,7 @@ def criteria(y_pred, y): # MSE data = [] for i, s in enumerate(labels): - data.append( - go.Scatter(x=np.arange(epochs), y=L[:, i], mode="markers+lines", name=s) - ) + data.append(go.Scatter(x=np.arange(epochs), y=L[:, i], mode="markers+lines", name=s)) layout = go.Layout( xaxis=dict(type="linear", autorange=True), yaxis=dict(type="log", autorange=True), diff --git a/utils/utils.py b/utils/utils.py index 3a3784b..4231d57 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -8,9 +8,7 @@ # Set printoptions torch.set_printoptions(linewidth=1320, precision=5, profile="long") -np.set_printoptions( - linewidth=320, formatter={"float_kind": "{:11.5g}".format} -) # format short g, %precision=5 +np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 def normalize(x, axis=None): # normalize x mean and std by axis @@ -32,9 +30,7 @@ def shuffledata(x, y): # randomly shuffle x and y by same axis=0 indices return x[i], y[i] -def splitdata( - x, y, train=0.7, validate=0.15, test=0.15, shuffle=False -): # split training data +def splitdata(x, y, train=0.7, validate=0.15, test=0.15, shuffle=False): # split training data """Splits data arrays x and y into training, validation, and test sets with optional shuffling.""" n = x.shape[0] if shuffle: @@ -73,13 +69,8 @@ def model_info(model): shape, mean, and std. """ n_p = sum(x.numel() for x in model.parameters()) # number parameters - n_g = sum( - x.numel() for x in model.parameters() if x.requires_grad - ) # number gradients - print( - "\n%5s %40s %9s %12s %20s %10s %10s" - % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma") - ) + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + print("\n%5s %40s %9s %12s %20s %10s %10s" % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma")) for i, (name, p) in enumerate(model.named_parameters()): name = name.replace("module_list.", "") print( @@ -119,9 +110,7 @@ def step(self, loss, metrics=None, model=None): self.num_bad_epochs += 1 self.epoch += 1 self.first(model) if self.epoch == 0 else None - self.printepoch( - self.epoch, loss, metrics - ) if self.epoch % self.printerval == 0 else None + self.printepoch(self.epoch, loss, metrics) if self.epoch % self.printerval == 0 else None if loss < self.bestloss: self.bestloss = loss @@ -130,9 +119,7 @@ def step(self, loss, metrics=None, model=None): self.num_bad_epochs = 0 if model: if self.bestmodel: - self.bestmodel.load_state_dict( - model.state_dict() - ) # faster than deepcopy + self.bestmodel.load_state_dict(model.state_dict()) # faster than deepcopy else: self.bestmodel = copy.deepcopy(model) @@ -140,9 +127,7 @@ def step(self, loss, metrics=None, model=None): self.final(f"{self.patience:g} Patience exceeded at epoch {self.epoch:g}.") return True elif self.epoch >= self.epochs: - self.final( - f"WARNING: {self.patience:g} Patience not exceeded by epoch {self.epoch:g} (train longer)." - ) + self.final(f"WARNING: {self.patience:g} Patience not exceeded by epoch {self.epoch:g} (train longer).") return True else: return False