ReBaBot/plot_result.py at main · Enange/ReBaBot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.ndimage import uniform_filter1d

def load_runs(algo_dir: str, argument: str) -> np.ndarray:
    """Load all CSV files from a directory. Returns an array of shape (n_runs, n_episodes)."""
    files = sorted(Path(algo_dir).glob("run_*.csv"))
    all_rewards = []

    for f in files:
        df = pd.read_csv(f)
        all_rewards.append(df[argument].values)

    min_len = min(len(r) for r in all_rewards)
    return np.array([r[:min_len] for r in all_rewards])

def plot_with_variance(results: dict, smooth_window: int = 20, y_argument: str = "Reward"):
    colors = {"PPO": "deepskyblue", "SAC": "limegreen"}
    fig, ax = plt.subplots(figsize=(10, 6))

    for algo, runs in results.items():
        # Smoothing + stats
        runs_s = uniform_filter1d(runs, size=smooth_window, axis=-1)
        mean     = runs_s.mean(axis=0)
        std      = runs_s.std(axis=0)
        episodes = np.arange(mean.shape[0])

        color = colors.get(algo, None)
        ax.plot(episodes, mean, label=algo, color=color, linewidth=2)
        ax.fill_between(episodes, mean - std, mean + std,
                        alpha=0.3, color=color)

    ax.set_xlabel("Episode")
    ax.set_ylabel(y_argument)
    ax.set_title("ReBaBot Training Results")
    ax.legend()
    plt.tight_layout()
    plt.savefig("comparison.png", dpi=150)
    plt.show()

# --- Main ---
argument = "smoothness_penalty"
results = {
    "PPO":          load_runs("datasets/results/ppo", argument=argument),
    "SAC":          load_runs("datasets/results/sac", argument=argument),
}
plot_with_variance(results, smooth_window=100, y_argument=argument)