TransformerProject/make_graphs.py at main · Edwardd02/TransformerProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import math
from numpy import load


# Statistics functions
def RMSE(predicted, actual):
    return math.sqrt(sum((predicted - actual) ** 2) / len(predicted))


def MAE(predicted, actual):
    return sum(abs(predicted - actual)) / len(predicted)


def MAPE(predicted, actual):
    return sum(abs((actual - predicted) / actual)) / len(predicted)


# Creates a plot
# pred is the dataframe with the predicted data
# actual is the dataframe with the original data
# For the best results, pred and actual should only contain one column each
# start and end determines the interval that will be plotted
# title, y, and x can be customized. They are just labels for the plot itself / axis
# extras is an array of data frames (similar to pred and actual). This is here if you want to plot something else alongside pred and actual
def plot(pred, actual, start, end, title="Prediction vs Actual", y="VWC (mm)", x="Time", extras=[]):
    fig, ax = plt.subplots(figsize=(20, 5))
    ax.set_title(title)
    ax.set_ylabel(y)
    ax.set_xlabel(x)
    ax.plot(pred[start:end], label="Predicted")
    ax.plot(actual[start:end], label="Actual")
    for extra in extras:
        ax.plot(extra[start:end], label=extra.name)
    ax.legend()
    plt.show()


# Prints all the statistics.
# This works best if you pass in the specific interval you want statistics on
# For example df[column][0:100]
def statisics(pred, actual):
    if type(pred) != torch.Tensor:
        p = torch.tensor(pred.values)
    else:
        p = pred
    if type(actual) != torch.Tensor:
        a = torch.tensor(actual.values)
    else:
        a = actual

    rmse = RMSE(p, a)
    mae = MAE(p, a)
    mape = MAPE(p, a)
    if type(rmse) == torch.Tensor:
        rmse = rmse.item()
    if type(mae) == torch.Tensor:
        mae = mae.item()
    if type(mape) == torch.Tensor:
        mape = mape.item()
    print("RMSE: %s" % (rmse))
    print("MAE: %s" % (mae))
    print("MAPE: %s" % (mape))


# denormalizes the df using the std and mean
def denormalize(df, std, mean):
    return df * (std + np.finfo(float).eps) + mean


experiment_name = "SMPModelTest_2025-02-19_19-41-24_MbF"
path = f"experiments/{experiment_name}/predictions/best_predictions.npz"
pathnorm = f"experiments/{experiment_name}/normalization.pickle"
# actual is the data without the gap
# gaps is the data with the gap
actual = pd.read_pickle("data/OriginalSMPData.pkl")
gaps = pd.read_pickle("data/SMPSKLearnGap.pkl")
start = 1440
end = 4320
testSet = gaps[start:end]
title = "SkLearn Filled Data - Epoch 300, mean mask length 50, batch size 128, Window Length 350"
# data is the best predictions
data = load(path, allow_pickle=True)

# Norm contains all the std and means for denormization purposes
norm = load(pathnorm, allow_pickle=True)

# In best predictions, ther are several sections, and predictions is the name of the predictions that we want
pred = data['predictions']
print(pred.shape)
# The predictions are of shape num batches, batch size, window size, row size
# This converts that format into a standard df
dfList = []
for block in pred:
    for batch in block:
        for row in batch:
            dfList.append(row)

# This formats the df
df = pd.DataFrame(dfList)
df.columns = actual.columns

# The predictions do not control dates (because the indices need to be related to the windows, and dtaes could not be processed)
# This maps the predictions to their timestamps
dates = testSet.index
matching_rows = actual.loc[actual.index.isin(dates)]

# Denormizing each column
# Doing it all at once fails
for column in df.columns:
    df[column] = denormalize(df[column], norm['std'][column], norm['mean'][column])

# Sometimes the prediction is larger than the gap due to the transoformer filling in for extra space if there are not enough batches to fill the entire batch size
# this fixes the issue by trimming off the excess
df = df[:len(matching_rows)]
df.index = matching_rows.index

intervals = {"P3_VWC": [0, 2160 - start]}
for col in intervals:
    statisics(df[col][intervals[col][0]:intervals[col][1]], matching_rows[col][intervals[col][0]:intervals[col][1]])
for col in intervals:
    plot(df[col], matching_rows[col], 0, len(df[col]), title="%s %s" % (title, "P3_VWC"))
for col in intervals:
    plot(df[col], matching_rows[col], intervals[col][0], intervals[col][1], title="%s %s" % (title, "P3_VWC"))