From 4b92379efe172bfbee975ee062618f8710f61c97 Mon Sep 17 00:00:00 2001
From: jonas-becker <snakeeye98@web.de>
Date: Wed, 25 Jun 2025 13:13:50 +0100
Subject: [PATCH 1/5] nicer labels

---
 mallm/evaluation/evaluator.py      |  2 ++
 mallm/evaluation/plotting/plots.py | 54 +++++++++++++++++++++++++++---
 mallm/utils/dicts.py               |  2 +-
 3 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/mallm/evaluation/evaluator.py b/mallm/evaluation/evaluator.py
index f98c9c10..c9189ed0 100644
--- a/mallm/evaluation/evaluator.py
+++ b/mallm/evaluation/evaluator.py
@@ -418,10 +418,12 @@ def run_evaluator(
     extensive: bool = False,
 ) -> None:
     if Path(input_json_file_path).is_dir():
+        print(f"Processing directory: {input_json_file_path}")
         batch_process_dir_path(
             input_json_file_path, output_dir_path, metrics, extensive
         )
     else:
+        print(f"Processing file: {input_json_file_path}")
         evaluator = Evaluator(input_json_file_path, output_dir_path, metrics, extensive)
         evaluator.process()
 
diff --git a/mallm/evaluation/plotting/plots.py b/mallm/evaluation/plotting/plots.py
index d793031d..3fc080eb 100644
--- a/mallm/evaluation/plotting/plots.py
+++ b/mallm/evaluation/plotting/plots.py
@@ -13,6 +13,11 @@ def process_eval_file(file_path: str) -> pd.DataFrame:
     return pd.DataFrame(data)
 
 
+def format_metric(text: str) -> str:
+    text = text.replace("_", " ")
+    return text.capitalize().replace("Correct", "Accuracy").replace("Correct", "Accuracy").replace("Rougel", "ROUGE-L").replace("Rouge1", "ROUGE-1").replace("Rouge2", "ROUGE-2").replace("Bleu", "BLEU").replace("Distinct1", "Distinct-1").replace("Distinct2", "Distinct-2")
+
+
 def process_stats_file(file_path: str) -> pd.DataFrame:
     data = json.loads(Path(file_path).read_text())
     # Extract only the average scores
@@ -79,7 +84,7 @@ def plot_turns_with_std(df: pd.DataFrame, input_path: str) -> None:
     plt.title("Mean Turns with Standard Deviation by Experiment Condition")
     plt.xticks(
         index,
-        [f"{row['option']}_{row['dataset']}" for _, row in grouped.iterrows()],
+        get_unique_labels(grouped),
         rotation=45,
         ha="right",
     )
@@ -114,7 +119,7 @@ def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str) -> None:
     plt.title("Mean Clock Seconds with Standard Deviation by Experiment Condition")
     plt.xticks(
         index,
-        [f"{row['option']}_{row['dataset']}" for _, row in grouped.iterrows()],
+        get_unique_labels(grouped),
         rotation=45,
         ha="right",
     )
@@ -158,7 +163,7 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
     )
     plt.xticks(
         index,
-        [f"{row['option']}_{row['dataset']}" for _, row in grouped.iterrows()],
+        get_unique_labels(grouped),
         rotation=45,
         ha="right",
     )
@@ -169,6 +174,45 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
     plt.close()
 
 
+def get_unique_labels(df: pd.DataFrame) -> list[str]:
+    labels = [f"{row['option']}_{row['dataset']}" for _, row in df.iterrows()]
+    # Extract unique parts by finding the longest common prefix and suffix
+    if not labels:
+        return []
+
+    # Find the longest common prefix
+    common_prefix = ""
+    if labels:
+        first_label = labels[0]
+        for i in range(len(first_label)):
+            if all(label.startswith(first_label[:i + 1]) for label in labels):
+                common_prefix = first_label[:i + 1]
+            else:
+                break
+
+    # Find the longest common suffix
+    common_suffix = ""
+    if labels:
+        first_label = labels[0]
+        for i in range(len(first_label)):
+            if all(label.endswith(first_label[-(i + 1):]) for label in labels):
+                common_suffix = first_label[-(i + 1):]
+            else:
+                break
+
+    # Extract unique parts by removing common prefix and suffix
+    unique_labels = []
+    for label in labels:
+        unique_part = label
+        if common_prefix and label.startswith(common_prefix):
+            unique_part = unique_part[len(common_prefix):]
+        if common_suffix and unique_part.endswith(common_suffix):
+            unique_part = unique_part[:-len(common_suffix)]
+        unique_labels.append(format_metric(unique_part))
+
+    return unique_labels
+
+
 def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str) -> None:
     print("Shape of stats_df:", df.shape)
     print("Columns in stats_df:", df.columns)
@@ -224,10 +268,10 @@ def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str) -> None
 
         plt.xlabel("Experiment Condition")
         plt.ylabel("Average Score")
-        plt.title(f"Mean {score_type} Score with Standard Deviation")
+        plt.title(f"Mean {format_metric(score_type)} Score with Standard Deviation")
         plt.xticks(
             x,
-            [f"{row['option']}_{row['dataset']}" for _, row in score_data.iterrows()],
+            get_unique_labels(score_data),
             rotation=45,
             ha="right",
         )
diff --git a/mallm/utils/dicts.py b/mallm/utils/dicts.py
index 3123d029..acc738d3 100644
--- a/mallm/utils/dicts.py
+++ b/mallm/utils/dicts.py
@@ -7,10 +7,10 @@
 )
 from mallm.decision_protocols.consensus_voting import ConsensusVoting
 from mallm.decision_protocols.cumulative_voting import CumulativeVoting
+from mallm.decision_protocols.judge import Judge
 from mallm.decision_protocols.protocol import DecisionProtocol
 from mallm.decision_protocols.ranked_voting import RankedVoting
 from mallm.decision_protocols.simple_voting import SimpleVoting
-from mallm.decision_protocols.judge import Judge
 from mallm.discussion_paradigms.collective_refinement import CollectiveRefinement
 from mallm.discussion_paradigms.debate import DiscussionDebate
 from mallm.discussion_paradigms.memory import DiscussionMemory

From cf856f86c69c231297cbaaac4bbc3e385f7e97c0 Mon Sep 17 00:00:00 2001
From: larskaesberg <larskaesberg@gmail.com>
Date: Thu, 3 Jul 2025 01:45:53 +0200
Subject: [PATCH 2/5] feat: better plotting for mallm

---
 mallm/evaluation/plotting/plots.py | 409 +++++++++++++++++++++--------
 1 file changed, 297 insertions(+), 112 deletions(-)

diff --git a/mallm/evaluation/plotting/plots.py b/mallm/evaluation/plotting/plots.py
index 3fc080eb..ec4cda5f 100644
--- a/mallm/evaluation/plotting/plots.py
+++ b/mallm/evaluation/plotting/plots.py
@@ -5,8 +5,24 @@
 
 import matplotlib.pyplot as plt
 import pandas as pd
+import seaborn as sns
+import numpy as np
 from tqdm import tqdm
 
+# Set the style for beautiful plots
+plt.style.use('seaborn-v0_8-whitegrid')
+sns.set_palette("husl")
+
+# Define a beautiful color palette
+COLORS = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
+
+def get_colors(n_colors):
+    """Generate enough colors for n_colors by cycling or using colormap"""
+    if n_colors <= len(COLORS):
+        return COLORS[:n_colors]
+    else:
+        # Use a colormap for more colors
+        return plt.cm.Set3(np.linspace(0, 1, n_colors))
 
 def process_eval_file(file_path: str) -> pd.DataFrame:
     data = json.loads(Path(file_path).read_text())
@@ -62,115 +78,199 @@ def aggregate_data(
 
 
 def plot_turns_with_std(df: pd.DataFrame, input_path: str) -> None:
-    grouped = (
-        df.groupby(["option", "dataset"])["turns"].agg(["mean", "std"]).reset_index()
-    )
-
-    plt.figure(figsize=(12, 6))
-    bar_width = 0.35
-    index = range(len(grouped))
-
-    plt.bar(
-        index,
-        grouped["mean"],
-        bar_width,
-        yerr=grouped["std"],
-        capsize=5,
-        label="Mean Turns with Std Dev",
-    )
-
-    plt.xlabel("Experiment Condition")
-    plt.ylabel("Number of Turns")
-    plt.title("Mean Turns with Standard Deviation by Experiment Condition")
-    plt.xticks(
-        index,
-        get_unique_labels(grouped),
-        rotation=45,
-        ha="right",
-    )
-    plt.legend()
+    """Create a beautiful violin plot for turns distribution"""
+    # Filter out rows with missing or invalid turns data
+    df = df.dropna(subset=['turns'])
+    df = df[df['turns'].notna() & (df['turns'] >= 0)]
+    
+    if df.empty:
+        print("Warning: No valid turns data found. Skipping turns plot.")
+        return
+    
+    # Create combination labels for better grouping
+    df['condition'] = df['option'] + '_' + df['dataset']
+    unique_labels = get_unique_labels_from_conditions(df['condition'].unique())
+    
+    # Create a mapping from full condition to unique label
+    condition_to_label = dict(zip(df['condition'].unique(), unique_labels))
+    df['condition_label'] = df['condition'].map(condition_to_label)
+    
+    plt.figure(figsize=(10, 4))
+    
+    # Create violin plot with individual points
+    ax = sns.violinplot(data=df, x='condition_label', y='turns', 
+                       hue='condition_label', palette=get_colors(len(df['condition_label'].unique())), 
+                       inner=None, alpha=0.7, legend=False)
+    
+    # Add individual points with jitter
+    sns.stripplot(data=df, x='condition_label', y='turns', 
+                  color='white', size=6, alpha=0.8, edgecolor='black', linewidth=0.5)
+    
+    # Add red diamond mean markers that align correctly with violin plots
+    unique_conditions = df['condition_label'].unique()
+    
+    for i, condition in enumerate(unique_conditions):
+        mean_val = df[df['condition_label'] == condition]['turns'].mean()
+        # Use red diamond markers positioned correctly
+        ax.plot(i, mean_val, marker='D', color='red', markersize=8, 
+                markeredgecolor='white', markeredgewidth=1, zorder=10)
+    
+    # Styling
+    ax.set_xlabel('')  # Remove automatic seaborn x-axis label
+    ax.set_ylabel('Number of Turns', fontsize=14, fontweight='bold')
+    
+    # Rotate labels and improve spacing
+    plt.xticks(rotation=45, ha='right', fontsize=14)
+    plt.yticks(fontsize=14)
+    plt.grid(True, alpha=0.3)
+    # Add a subtle background
+    ax.set_facecolor('#fafafa')
+    
     plt.tight_layout()
-    plt.savefig(f"{input_path}/turns_with_std_dev.png")
+    plt.subplots_adjust(bottom=0.12)  # Reduced space for rotated labels
+    plt.savefig(f"{input_path}/turns_distribution.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
+    plt.savefig(f"{input_path}/turns_distribution.pdf", bbox_inches='tight', pad_inches=0.1)
     plt.close()
 
 
 def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str) -> None:
+    """Create a beautiful horizontal lollipop chart for clock seconds"""
     grouped = (
         df.groupby(["option", "dataset"])["clockSeconds"]
         .agg(["mean", "std"])
         .reset_index()
     )
-
-    plt.figure(figsize=(12, 6))
-    bar_width = 0.35
-    index = range(len(grouped))
-
-    plt.bar(
-        index,
-        grouped["mean"],
-        bar_width,
-        yerr=grouped["std"],
-        capsize=5,
-        label="Mean Clock Seconds with Std Dev",
-    )
-
-    plt.xlabel("Experiment Condition")
-    plt.ylabel("Clock Seconds")
-    plt.title("Mean Clock Seconds with Standard Deviation by Experiment Condition")
-    plt.xticks(
-        index,
-        get_unique_labels(grouped),
-        rotation=45,
-        ha="right",
-    )
-    plt.legend()
+    
+    unique_labels = get_unique_labels(grouped)
+    grouped['label'] = unique_labels
+    
+    # Sort data: baselines first, then others by shortest time
+    def sort_key(row):
+        option = row['option'].lower()
+        if option.startswith('baseline'):
+            return (0, row['mean'])  # Baselines first, sorted by time
+        else:
+            return (1, row['mean'])  # Others after, sorted by time (shortest first)
+    
+    grouped['sort_key'] = grouped.apply(sort_key, axis=1)
+    grouped = grouped.sort_values('sort_key').drop('sort_key', axis=1).reset_index(drop=True)
+    # Reverse the entire order
+    grouped = grouped.iloc[::-1].reset_index(drop=True)
+    
+    fig, ax = plt.subplots(figsize=(10, 4))
+    
+    # Create discrete marker chart (no stems)
+    y_pos = np.arange(len(grouped))
+    colors = get_colors(len(grouped))
+    
+    # Draw discrete circular markers only
+    scatter = ax.scatter(grouped['mean'], y_pos, 
+                        s=250, c=colors, 
+                        alpha=0.9, edgecolors='white', linewidth=3, zorder=10)
+    
+    # Add subtle error bars
+    ax.errorbar(grouped['mean'], y_pos, xerr=grouped['std'], 
+                fmt='none', color='gray', alpha=0.5, capsize=6, linewidth=2)
+    
+    # Add value labels with better positioning to avoid circle overlap
+    for i, (_, row) in enumerate(grouped.iterrows()):
+        # Calculate offset to avoid overlap with circle (larger offset)
+        offset = max(row['std'] + max(grouped['mean']) * 0.08, max(grouped['mean']) * 0.05)
+        ax.text(row['mean'] + offset, i, 
+                f'{row["mean"]:.1f}s', 
+                va='center', ha='left', fontweight='bold', fontsize=14,
+                bbox=dict(boxstyle='round,pad=0.15', facecolor='white', alpha=0.8, edgecolor='none'))
+    
+    # Styling
+    ax.set_yticks(y_pos)
+    ax.set_yticklabels(grouped['label'], fontsize=14)
+    ax.set_xlabel('Execution Time (seconds)', fontsize=14, fontweight='bold')
+    
+    # Set x-axis limits with proper margins for labels
+    max_val = max(grouped['mean'] + grouped['std'])
+    ax.set_xlim(0, max_val * 1.3)  # Extra space for non-overlapping labels
+    
+    # Remove top and right spines for cleaner look
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['left'].set_color('#cccccc')
+    ax.spines['bottom'].set_color('#cccccc')
+    ax.tick_params(axis='x', labelsize=14)
+    ax.grid(True, alpha=0.3, axis='x', linestyle='-', linewidth=0.5)
+    ax.set_facecolor('#fafafa')
+    
     plt.tight_layout()
-    plt.savefig(f"{input_path}/clock_seconds_with_std_dev.png")
+    plt.savefig(f"{input_path}/clock_seconds.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
+    plt.savefig(f"{input_path}/clock_seconds.pdf", bbox_inches='tight', pad_inches=0.1)
     plt.close()
 
 
 def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
+    """Create a beautiful horizontal bar chart for decision success rates"""
     if "decisionSuccess" not in df.columns:
         print(
             "Warning: 'decisionSuccess' column not found. Skipping decision success plot."
         )
         return
 
+    # Filter out rows with missing or invalid decision success data
+    df = df.dropna(subset=['decisionSuccess'])
+    df = df[df['decisionSuccess'].notna()]
+    
+    if df.empty:
+        print("Warning: No valid decision success data found. Skipping decision success plot.")
+        return
+
     df["decision_success_numeric"] = df["decisionSuccess"].map({True: 1, False: 0})
     grouped = (
         df.groupby(["option", "dataset"])["decision_success_numeric"]
         .agg(["mean", "std"])
         .reset_index()
     )
-
-    plt.figure(figsize=(12, 6))
-    bar_width = 0.35
-    index = range(len(grouped))
-
-    plt.bar(
-        index,
-        grouped["mean"],
-        bar_width,
-        yerr=grouped["std"],
-        capsize=5,
-        label="Mean Decision Success Rate with Std Dev",
-    )
-
-    plt.xlabel("Experiment Condition")
-    plt.ylabel("Decision Success Rate")
-    plt.title(
-        "Mean Decision Success Rate with Standard Deviation by Experiment Condition"
-    )
-    plt.xticks(
-        index,
-        get_unique_labels(grouped),
-        rotation=45,
-        ha="right",
-    )
-    plt.legend()
-    plt.ylim(0, 1)  # Set y-axis limits for percentage
+    
+    unique_labels = get_unique_labels(grouped)
+    grouped['label'] = unique_labels
+    grouped = grouped.sort_values('mean')
+    
+    fig, ax = plt.subplots(figsize=(10, 3))
+    
+    # Create gradient colors based on success rate
+    colors = plt.cm.RdYlGn(grouped['mean'])
+    
+    # Create horizontal bars
+    bars = ax.barh(range(len(grouped)), grouped['mean'], 
+                   color=colors, alpha=0.8, height=0.6)
+    
+    # Add percentage labels on bars
+    for i, (_, row) in enumerate(grouped.iterrows()):
+        percentage = row['mean'] * 100
+        ax.text(row['mean'] + 0.02, i, f'{percentage:.1f}%', 
+                va='center', ha='left', fontweight='bold', fontsize=14)
+    
+    # Add a subtle pattern to bars
+    for bar, rate in zip(bars, grouped['mean']):
+        if rate < 0.5:  # Add pattern for low success rates
+            bar.set_hatch('///')
+    
+    # Styling
+    ax.set_yticks(range(len(grouped)))
+    ax.set_yticklabels(grouped['label'], fontsize=14)
+    ax.set_xlabel('Decision Success Rate', fontsize=14, fontweight='bold')
+    ax.set_xlim(0, 1.1)
+    
+    # Add percentage ticks
+    ax.set_xticks([0, 0.25, 0.5, 0.75, 1.0])
+    ax.set_xticklabels(['0%', '25%', '50%', '75%', '100%'], fontsize=14)
+    
+    # Remove spines and add grid
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.grid(True, alpha=0.3, axis='x')
+    ax.set_facecolor('#fafafa')
+    
     plt.tight_layout()
-    plt.savefig(f"{input_path}/decision_success_with_std_dev.png")
+    plt.savefig(f"{input_path}/decision_success.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
+    plt.savefig(f"{input_path}/decision_success.pdf", bbox_inches='tight', pad_inches=0.1)
     plt.close()
 
 
@@ -213,7 +313,50 @@ def get_unique_labels(df: pd.DataFrame) -> list[str]:
     return unique_labels
 
 
+def get_unique_labels_from_conditions(conditions) -> list[str]:
+    """Helper function to get unique labels from condition strings"""
+    # Convert to list if it's a numpy array
+    if hasattr(conditions, 'tolist'):
+        conditions = conditions.tolist()
+    
+    if len(conditions) == 0:
+        return []
+
+    # Find the longest common prefix
+    common_prefix = ""
+    if len(conditions) > 0:
+        first_condition = conditions[0]
+        for i in range(len(first_condition)):
+            if all(condition.startswith(first_condition[:i + 1]) for condition in conditions):
+                common_prefix = first_condition[:i + 1]
+            else:
+                break
+
+    # Find the longest common suffix
+    common_suffix = ""
+    if len(conditions) > 0:
+        first_condition = conditions[0]
+        for i in range(len(first_condition)):
+            if all(condition.endswith(first_condition[-(i + 1):]) for condition in conditions):
+                common_suffix = first_condition[-(i + 1):]
+            else:
+                break
+
+    # Extract unique parts by removing common prefix and suffix
+    unique_labels = []
+    for condition in conditions:
+        unique_part = condition
+        if common_prefix and condition.startswith(common_prefix):
+            unique_part = unique_part[len(common_prefix):]
+        if common_suffix and unique_part.endswith(common_suffix):
+            unique_part = unique_part[:-len(common_suffix)]
+        unique_labels.append(format_metric(unique_part))
+
+    return unique_labels
+
+
 def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str) -> None:
+    """Create beautiful enhanced bar charts for score distributions"""
     print("Shape of stats_df:", df.shape)
     print("Columns in stats_df:", df.columns)
     print("First few rows of stats_df:")
@@ -243,47 +386,89 @@ def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str) -> None
         .reset_index()
     )
 
-    # Safe grouped data to a CSV file
-
     # Create a separate plot for each Score Type
     for score_type in grouped["Score Type"].unique():
-        plt.figure(figsize=(10, 6))
+        fig, ax = plt.subplots(figsize=(10, 4))
 
         # Filter data for the current score type
-        score_data = grouped[grouped["Score Type"] == score_type]
+        score_data = grouped[grouped["Score Type"] == score_type].copy()
+        
+        # Sort data: baselines first, then alphabetically
+        def sort_key(row):
+            option = row['option'].lower()
+            if option.startswith('baseline'):
+                return (0, option)  # Baselines first
+            else:
+                return (1, option)  # Others after
+        
+        score_data['sort_key'] = score_data.apply(sort_key, axis=1)
+        score_data = score_data.sort_values('sort_key').drop('sort_key', axis=1).reset_index(drop=True)
+        
         score_data.to_csv(
             f'{input_path}/{score_type.replace(" ", "_").lower()}_score.csv',
             index=False,
         )
 
-        # Create bar plot
-        x = range(len(score_data))
-        plt.bar(
-            x,
-            score_data["mean"],
-            yerr=score_data["std"],
-            capsize=5,
-            color=plt.cm.Set3(range(len(score_data))),
-        )  # Use a color cycle
-
-        plt.xlabel("Experiment Condition")
-        plt.ylabel("Average Score")
-        plt.title(f"Mean {format_metric(score_type)} Score with Standard Deviation")
-        plt.xticks(
-            x,
-            get_unique_labels(score_data),
-            rotation=45,
-            ha="right",
-        )
-
-        # Add value labels on top of each bar
-        for i, v in enumerate(score_data["mean"]):
-            plt.text(
-                i, v + score_data["std"].iloc[i], f"{v:.2f}", ha="center", va="bottom"
-            )
-
+        # Create beautiful bar plot with gradient colors
+        x = np.arange(len(score_data))
+        colors = plt.cm.viridis(np.linspace(0, 1, len(score_data)))
+        
+        bars = ax.bar(x, score_data["mean"], 
+                     yerr=score_data["std"],
+                     capsize=8,
+                     color=colors, alpha=0.8,
+                     edgecolor='white', linewidth=2,
+                     width=0.6)  # Slightly narrower bars for more discrete look
+
+        # Calculate proper y-axis limits
+        max_height = max(score_data["mean"] + score_data["std"])
+        min_val = min(0, min(score_data["mean"] - score_data["std"]))
+        y_range = max_height - min_val
+        
+        # Add value labels on top of each bar with better positioning
+        for i, (bar, mean_val, std_val) in enumerate(zip(bars, score_data["mean"], score_data["std"])):
+            height = mean_val + std_val
+            ax.text(bar.get_x() + bar.get_width()/2., height + y_range * 0.05,
+                   f'{mean_val:.3f}', ha='center', va='bottom', 
+                   fontweight='bold', fontsize=14,
+                   bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
+            
+            # Add gradient effect to bars
+            gradient = np.linspace(0, 1, 256).reshape(256, -1)
+            ax.imshow(gradient, extent=[bar.get_x(), bar.get_x() + bar.get_width(), 
+                                      0, bar.get_height()], 
+                     aspect='auto', alpha=0.3, cmap='viridis')
+
+        # Styling
+        ax.set_ylabel('Average Score', fontsize=14, fontweight='bold')
+        
+        # Set x-axis with proper spacing and labels
+        ax.set_xticks(x)
+        ax.set_xticklabels(get_unique_labels(score_data), rotation=45, ha='right', fontsize=14)
+        
+        # Set proper axis limits to prevent cut-off and add margins
+        ax.set_xlim(-0.6, len(x) - 0.4)  # Add margins on both sides
+        ax.set_ylim(min_val - y_range * 0.05, max_height + y_range * 0.15)
+        
+        # Add grid and styling
+        ax.tick_params(axis='y', labelsize=14)
+        ax.grid(True, alpha=0.3, axis='y')
+        ax.set_facecolor('#fafafa')
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        
+        # Add a subtle shadow effect
+        for spine in ax.spines.values():
+            spine.set_linewidth(1.5)
+            spine.set_color('#cccccc')
+
+        # Improve layout with better margins
         plt.tight_layout()
-        plt.savefig(f'{input_path}/{score_type.replace(" ", "_").lower()}_score.png')
+        plt.subplots_adjust(bottom=0.12)  # Reduced space for rotated labels
+        plt.savefig(f'{input_path}/{score_type.replace(" ", "_").lower()}_score.png', 
+                   dpi=300, bbox_inches='tight', pad_inches=0.1)
+        plt.savefig(f'{input_path}/{score_type.replace(" ", "_").lower()}_score.pdf', 
+                   bbox_inches='tight', pad_inches=0.1)
         plt.close()
 
 

From 7816cb189c1641dd482240ac3a19225755565ac7 Mon Sep 17 00:00:00 2001
From: larskaesberg <larskaesberg@gmail.com>
Date: Thu, 3 Jul 2025 17:09:08 +0200
Subject: [PATCH 3/5] fix: better styling for plots

---
 mallm/evaluation/plotting/plots.py | 174 +++++++++++++++++++----------
 1 file changed, 116 insertions(+), 58 deletions(-)

diff --git a/mallm/evaluation/plotting/plots.py b/mallm/evaluation/plotting/plots.py
index ec4cda5f..5c0eb2b2 100644
--- a/mallm/evaluation/plotting/plots.py
+++ b/mallm/evaluation/plotting/plots.py
@@ -10,8 +10,8 @@
 from tqdm import tqdm
 
 # Set the style for beautiful plots
-plt.style.use('seaborn-v0_8-whitegrid')
-sns.set_palette("husl")
+plt.style.use('seaborn-v0_8-pastel')
+sns.set_palette("pastel")
 
 # Define a beautiful color palette
 COLORS = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
@@ -24,6 +24,22 @@ def get_colors(n_colors):
         # Use a colormap for more colors
         return plt.cm.Set3(np.linspace(0, 1, n_colors))
 
+
+def get_consistent_color_mapping(options):
+    """Create consistent color mapping based on option names"""
+    # Sort options to ensure consistent assignment
+    unique_options = sorted(set(options))
+    
+    # Generate enough colors
+    if len(unique_options) <= len(COLORS):
+        colors = COLORS[:len(unique_options)]
+    else:
+        colors = plt.cm.Set3(np.linspace(0, 1, len(unique_options)))
+    
+    # Create mapping
+    return dict(zip(unique_options, colors))
+
+
 def process_eval_file(file_path: str) -> pd.DataFrame:
     data = json.loads(Path(file_path).read_text())
     return pd.DataFrame(data)
@@ -77,7 +93,7 @@ def aggregate_data(
     return eval_df, stats_df
 
 
-def plot_turns_with_std(df: pd.DataFrame, input_path: str) -> None:
+def plot_turns_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
     """Create a beautiful violin plot for turns distribution"""
     # Filter out rows with missing or invalid turns data
     df = df.dropna(subset=['turns'])
@@ -87,42 +103,68 @@ def plot_turns_with_std(df: pd.DataFrame, input_path: str) -> None:
         print("Warning: No valid turns data found. Skipping turns plot.")
         return
     
-    # Create combination labels for better grouping
-    df['condition'] = df['option'] + '_' + df['dataset']
-    unique_labels = get_unique_labels_from_conditions(df['condition'].unique())
+    # Create grouped data like other plots for consistent color assignment
+    grouped_data = df.groupby(['option', 'dataset']).agg({
+        'turns': list  # Keep all turns values for violin plot
+    }).reset_index()
+    
+    # Create unique labels like other plots
+    unique_labels = get_unique_labels(grouped_data)
+    grouped_data['label'] = unique_labels
+    
+    # Use global color mapping if provided, otherwise create local one
+    if global_color_mapping is None:
+        color_mapping = get_consistent_color_mapping(grouped_data['option'].unique())
+    else:
+        color_mapping = global_color_mapping
+    
+    # Create color palette based on option order in grouped data
+    colors = [color_mapping[option] for option in grouped_data['option']]
     
-    # Create a mapping from full condition to unique label
-    condition_to_label = dict(zip(df['condition'].unique(), unique_labels))
-    df['condition_label'] = df['condition'].map(condition_to_label)
+    # Expand the grouped data back to individual rows for violin plot
+    expanded_data = []
+    for i, row in grouped_data.iterrows():
+        for turn_value in row['turns']:
+            expanded_data.append({
+                'option': row['option'],
+                'dataset': row['dataset'], 
+                'label': row['label'],
+                'turns': turn_value
+            })
+    
+    plot_df = pd.DataFrame(expanded_data)
     
     plt.figure(figsize=(10, 4))
     
-    # Create violin plot with individual points
-    ax = sns.violinplot(data=df, x='condition_label', y='turns', 
-                       hue='condition_label', palette=get_colors(len(df['condition_label'].unique())), 
-                       inner=None, alpha=0.7, legend=False)
+    # Create violin plot with the same label order as other plots
+    ax = sns.violinplot(data=plot_df, x='label', y='turns', 
+                       order=grouped_data['label'], palette=colors, 
+                       inner=None, legend=False)
     
     # Add individual points with jitter
-    sns.stripplot(data=df, x='condition_label', y='turns', 
-                  color='white', size=6, alpha=0.8, edgecolor='black', linewidth=0.5)
+    sns.stripplot(data=plot_df, x='label', y='turns', 
+                  order=grouped_data['label'], color='white', size=6, 
+                  edgecolor='black', linewidth=0.5)
     
-    # Add red diamond mean markers that align correctly with violin plots
-    unique_conditions = df['condition_label'].unique()
+    # Set all plot elements above grid
+    for collection in ax.collections:
+        collection.set_zorder(4)
     
-    for i, condition in enumerate(unique_conditions):
-        mean_val = df[df['condition_label'] == condition]['turns'].mean()
+    # Add red diamond mean markers that align correctly with violin plots
+    for i, label in enumerate(grouped_data['label']):
+        mean_val = plot_df[plot_df['label'] == label]['turns'].mean()
         # Use red diamond markers positioned correctly
         ax.plot(i, mean_val, marker='D', color='red', markersize=8, 
-                markeredgecolor='white', markeredgewidth=1, zorder=10)
+                markeredgecolor='white', markeredgewidth=1, zorder=5)
     
     # Styling
     ax.set_xlabel('')  # Remove automatic seaborn x-axis label
-    ax.set_ylabel('Number of Turns', fontsize=14, fontweight='bold')
+    ax.set_ylabel('Number of Turns', fontsize=14)
     
     # Rotate labels and improve spacing
     plt.xticks(rotation=45, ha='right', fontsize=14)
     plt.yticks(fontsize=14)
-    plt.grid(True, alpha=0.3)
+    ax.grid(True, alpha=0.3, zorder=0)
     # Add a subtle background
     ax.set_facecolor('#fafafa')
     
@@ -133,7 +175,7 @@ def plot_turns_with_std(df: pd.DataFrame, input_path: str) -> None:
     plt.close()
 
 
-def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str) -> None:
+def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
     """Create a beautiful horizontal lollipop chart for clock seconds"""
     grouped = (
         df.groupby(["option", "dataset"])["clockSeconds"]
@@ -161,16 +203,22 @@ def sort_key(row):
     
     # Create discrete marker chart (no stems)
     y_pos = np.arange(len(grouped))
-    colors = get_colors(len(grouped))
+    
+    # Use global color mapping if provided, otherwise create local one
+    if global_color_mapping is None:
+        color_mapping = get_consistent_color_mapping(grouped['option'].unique())
+    else:
+        color_mapping = global_color_mapping
+    colors = [color_mapping[option] for option in grouped['option']]
     
     # Draw discrete circular markers only
     scatter = ax.scatter(grouped['mean'], y_pos, 
                         s=250, c=colors, 
-                        alpha=0.9, edgecolors='white', linewidth=3, zorder=10)
+                        edgecolors='white', linewidth=3, zorder=5)
     
-    # Add subtle error bars
+    # Add error bars
     ax.errorbar(grouped['mean'], y_pos, xerr=grouped['std'], 
-                fmt='none', color='gray', alpha=0.5, capsize=6, linewidth=2)
+                fmt='none', color='gray', capsize=6, linewidth=2, zorder=4)
     
     # Add value labels with better positioning to avoid circle overlap
     for i, (_, row) in enumerate(grouped.iterrows()):
@@ -178,13 +226,12 @@ def sort_key(row):
         offset = max(row['std'] + max(grouped['mean']) * 0.08, max(grouped['mean']) * 0.05)
         ax.text(row['mean'] + offset, i, 
                 f'{row["mean"]:.1f}s', 
-                va='center', ha='left', fontweight='bold', fontsize=14,
-                bbox=dict(boxstyle='round,pad=0.15', facecolor='white', alpha=0.8, edgecolor='none'))
+                va='center', ha='left', fontsize=14, zorder=6)
     
     # Styling
     ax.set_yticks(y_pos)
     ax.set_yticklabels(grouped['label'], fontsize=14)
-    ax.set_xlabel('Execution Time (seconds)', fontsize=14, fontweight='bold')
+    ax.set_xlabel('Execution Time (seconds)', fontsize=14)
     
     # Set x-axis limits with proper margins for labels
     max_val = max(grouped['mean'] + grouped['std'])
@@ -196,7 +243,7 @@ def sort_key(row):
     ax.spines['left'].set_color('#cccccc')
     ax.spines['bottom'].set_color('#cccccc')
     ax.tick_params(axis='x', labelsize=14)
-    ax.grid(True, alpha=0.3, axis='x', linestyle='-', linewidth=0.5)
+    ax.grid(True, alpha=0.3, axis='x', zorder=0)
     ax.set_facecolor('#fafafa')
     
     plt.tight_layout()
@@ -205,7 +252,7 @@ def sort_key(row):
     plt.close()
 
 
-def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
+def plot_decision_success_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
     """Create a beautiful horizontal bar chart for decision success rates"""
     if "decisionSuccess" not in df.columns:
         print(
@@ -234,18 +281,22 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
     
     fig, ax = plt.subplots(figsize=(10, 3))
     
-    # Create gradient colors based on success rate
-    colors = plt.cm.RdYlGn(grouped['mean'])
+    # Use global color mapping if provided, otherwise create local one
+    if global_color_mapping is None:
+        color_mapping = get_consistent_color_mapping(grouped['option'].unique())
+    else:
+        color_mapping = global_color_mapping
+    colors = [color_mapping[option] for option in grouped['option']]
     
     # Create horizontal bars
     bars = ax.barh(range(len(grouped)), grouped['mean'], 
-                   color=colors, alpha=0.8, height=0.6)
+                   color=colors, height=0.6, zorder=3)
     
     # Add percentage labels on bars
     for i, (_, row) in enumerate(grouped.iterrows()):
         percentage = row['mean'] * 100
         ax.text(row['mean'] + 0.02, i, f'{percentage:.1f}%', 
-                va='center', ha='left', fontweight='bold', fontsize=14)
+                va='center', ha='left', fontsize=14, zorder=6)
     
     # Add a subtle pattern to bars
     for bar, rate in zip(bars, grouped['mean']):
@@ -255,7 +306,7 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
     # Styling
     ax.set_yticks(range(len(grouped)))
     ax.set_yticklabels(grouped['label'], fontsize=14)
-    ax.set_xlabel('Decision Success Rate', fontsize=14, fontweight='bold')
+    ax.set_xlabel('Decision Success Rate', fontsize=14)
     ax.set_xlim(0, 1.1)
     
     # Add percentage ticks
@@ -265,7 +316,7 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str) -> None:
     # Remove spines and add grid
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
-    ax.grid(True, alpha=0.3, axis='x')
+    ax.grid(True, alpha=0.3, axis='x', zorder=0)
     ax.set_facecolor('#fafafa')
     
     plt.tight_layout()
@@ -355,7 +406,7 @@ def get_unique_labels_from_conditions(conditions) -> list[str]:
     return unique_labels
 
 
-def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str) -> None:
+def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
     """Create beautiful enhanced bar charts for score distributions"""
     print("Shape of stats_df:", df.shape)
     print("Columns in stats_df:", df.columns)
@@ -409,16 +460,21 @@ def sort_key(row):
             index=False,
         )
 
-        # Create beautiful bar plot with gradient colors
+        # Create beautiful bar plot with consistent colors
         x = np.arange(len(score_data))
-        colors = plt.cm.viridis(np.linspace(0, 1, len(score_data)))
+        
+        # Use global color mapping if provided, otherwise create local one
+        if global_color_mapping is None:
+            color_mapping = get_consistent_color_mapping(score_data['option'].unique())
+        else:
+            color_mapping = global_color_mapping
+        colors = [color_mapping[option] for option in score_data['option']]
         
         bars = ax.bar(x, score_data["mean"], 
                      yerr=score_data["std"],
                      capsize=8,
-                     color=colors, alpha=0.8,
-                     edgecolor='white', linewidth=2,
-                     width=0.6)  # Slightly narrower bars for more discrete look
+                     color=colors,
+                     width=0.6, zorder=3)  # Slightly narrower bars for more discrete look
 
         # Calculate proper y-axis limits
         max_height = max(score_data["mean"] + score_data["std"])
@@ -430,17 +486,10 @@ def sort_key(row):
             height = mean_val + std_val
             ax.text(bar.get_x() + bar.get_width()/2., height + y_range * 0.05,
                    f'{mean_val:.3f}', ha='center', va='bottom', 
-                   fontweight='bold', fontsize=14,
-                   bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
-            
-            # Add gradient effect to bars
-            gradient = np.linspace(0, 1, 256).reshape(256, -1)
-            ax.imshow(gradient, extent=[bar.get_x(), bar.get_x() + bar.get_width(), 
-                                      0, bar.get_height()], 
-                     aspect='auto', alpha=0.3, cmap='viridis')
+                   fontsize=14, zorder=6)
 
         # Styling
-        ax.set_ylabel('Average Score', fontsize=14, fontweight='bold')
+        ax.set_ylabel('Average Score', fontsize=14)
         
         # Set x-axis with proper spacing and labels
         ax.set_xticks(x)
@@ -452,7 +501,7 @@ def sort_key(row):
         
         # Add grid and styling
         ax.tick_params(axis='y', labelsize=14)
-        ax.grid(True, alpha=0.3, axis='y')
+        ax.grid(True, alpha=0.3, axis='y', zorder=0)
         ax.set_facecolor('#fafafa')
         ax.spines['top'].set_visible(False)
         ax.spines['right'].set_visible(False)
@@ -481,22 +530,31 @@ def create_plots_for_path(input_dir_path: str, output_dir_path: str) -> None:
     print("First few rows of eval_df:")
     print(eval_df.head())
 
+    # Create global color mapping for all options across all plots
+    all_options = set()
+    if not eval_df.empty and 'option' in eval_df.columns:
+        all_options.update(eval_df['option'].unique())
+    if not stats_df.empty and 'option' in stats_df.columns:
+        all_options.update(stats_df['option'].unique())
+    
+    global_color_mapping = get_consistent_color_mapping(list(all_options))
+
     available_columns = eval_df.columns
 
     if "turns" in available_columns:
-        plot_turns_with_std(eval_df, output_dir_path)
+        plot_turns_with_std(eval_df, output_dir_path, global_color_mapping)
     else:
         print("Warning: 'turns' column not found. Skipping turns plot.")
 
     if "clockSeconds" in available_columns:
-        plot_clock_seconds_with_std(eval_df, output_dir_path)
+        plot_clock_seconds_with_std(eval_df, output_dir_path, global_color_mapping)
     else:
         print("Warning: 'clockSeconds' column not found. Skipping clock seconds plot.")
 
-    plot_decision_success_with_std(eval_df, output_dir_path)
+    plot_decision_success_with_std(eval_df, output_dir_path, global_color_mapping)
 
     if not stats_df.empty:
-        plot_score_distributions_with_std(stats_df, output_dir_path)
+        plot_score_distributions_with_std(stats_df, output_dir_path, global_color_mapping)
     else:
         print("Warning: No stats data available. Skipping score distributions plot.")
 

From f6c8006241475b193b20ea630339142b626fa476 Mon Sep 17 00:00:00 2001
From: larskaesberg <larskaesberg@gmail.com>
Date: Fri, 4 Jul 2025 15:13:29 +0200
Subject: [PATCH 4/5] fix: add seaborn

---
 poetry.lock    | 243 ++++++++++++++++++++++++++++++++++++++++++-------
 pyproject.toml |   1 +
 2 files changed, 210 insertions(+), 34 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 9d4a22c4..6a2fc274 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -6,6 +6,7 @@ version = "2.1.0"
 description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "absl-py-2.1.0.tar.gz", hash = "sha256:7820790efbb316739cde8b4e19357243fc3608a152024288513dd968d7d959ff"},
     {file = "absl_py-2.1.0-py3-none-any.whl", hash = "sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308"},
@@ -17,6 +18,7 @@ version = "2.4.3"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
     {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
@@ -28,6 +30,7 @@ version = "3.11.7"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "aiohttp-3.11.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8bedb1f6cb919af3b6353921c71281b1491f948ca64408871465d889b4ee1b66"},
     {file = "aiohttp-3.11.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f5022504adab881e2d801a88b748ea63f2a9d130e0b2c430824682a96f6534be"},
@@ -118,7 +121,7 @@ propcache = ">=0.2.0"
 yarl = ">=1.17.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 
 [[package]]
 name = "aiosignal"
@@ -126,6 +129,7 @@ version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
     {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
@@ -140,6 +144,7 @@ version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -151,6 +156,7 @@ version = "4.6.2.post1"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"},
     {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"},
@@ -164,7 +170,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
 
 [package.extras]
 doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
 trio = ["trio (>=0.26.1)"]
 
 [[package]]
@@ -173,6 +179,8 @@ version = "4.0.3"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
     {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@@ -184,18 +192,19 @@ version = "24.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"},
     {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
 ]
 
 [package.extras]
-benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
 docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
+tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""]
 
 [[package]]
 name = "beautifulsoup4"
@@ -203,6 +212,7 @@ version = "4.12.3"
 description = "Screen-scraping library"
 optional = false
 python-versions = ">=3.6.0"
+groups = ["main"]
 files = [
     {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
     {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
@@ -224,6 +234,7 @@ version = "0.3.13"
 description = "PyTorch implementation of BERT score"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "bert_score-0.3.13-py3-none-any.whl", hash = "sha256:bbbb4c7fcdaa46d7681aff49f37f96faa09ed74e1b150e659bdc6b58a66989b9"},
     {file = "bert_score-0.3.13.tar.gz", hash = "sha256:8ffe5838eac8cdd988b8b1a896af7f49071188c8c011a1ed160d71a9899a2ba4"},
@@ -245,6 +256,7 @@ version = "2024.8.30"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
     {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
@@ -256,6 +268,7 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -335,6 +348,7 @@ version = "3.4.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7.0"
+groups = ["main"]
 files = [
     {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"},
     {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"},
@@ -449,6 +463,7 @@ version = "8.1.7"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
     {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
@@ -463,6 +478,7 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -474,6 +490,7 @@ version = "0.1.4"
 description = "Empowering Conversations with Real-Time Facts"
 optional = false
 python-versions = "<4.0,>=3.9"
+groups = ["main"]
 files = [
     {file = "contextplus-0.1.4-py3-none-any.whl", hash = "sha256:c75af0e4fb03d0a1c36a88100de5097f2f3f333a8a54a8ea86e9f069feb12d06"},
     {file = "contextplus-0.1.4.tar.gz", hash = "sha256:4c9168633e4895469713cb732ca35fb1fee9caa11cc447a367b32bee873f0713"},
@@ -490,6 +507,7 @@ version = "1.3.0"
 description = "Python library for calculating contours of 2D quadrilateral grids"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"},
     {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"},
@@ -574,6 +592,7 @@ version = "7.6.7"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "coverage-7.6.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:108bb458827765d538abcbf8288599fee07d2743357bdd9b9dad456c287e121e"},
     {file = "coverage-7.6.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c973b2fe4dc445cb865ab369df7521df9c27bf40715c837a113edaa2aa9faf45"},
@@ -640,7 +659,7 @@ files = [
 ]
 
 [package.extras]
-toml = ["tomli"]
+toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
 [[package]]
 name = "coverage-badge"
@@ -648,6 +667,7 @@ version = "1.1.2"
 description = "Generate coverage badges for Coverage.py."
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "coverage_badge-1.1.2-py2.py3-none-any.whl", hash = "sha256:d8413ce51c91043a1692b943616b450868cbeeb0ea6a0c54a32f8318c9c96ff7"},
     {file = "coverage_badge-1.1.2.tar.gz", hash = "sha256:fe7ed58a3b72dad85a553b64a99e963dea3847dcd0b8ddd2b38a00333618642c"},
@@ -663,6 +683,7 @@ version = "0.12.1"
 description = "Composable style cycles"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
     {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
@@ -678,6 +699,7 @@ version = "0.6.7"
 description = "Easily serialize dataclasses to and from JSON."
 optional = false
 python-versions = "<4.0,>=3.7"
+groups = ["main"]
 files = [
     {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"},
     {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"},
@@ -693,6 +715,7 @@ version = "2.21.0"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "datasets-2.21.0-py3-none-any.whl", hash = "sha256:25e4e097110ce28824b746a107727ada94024cba11db8bc588d468414692b65a"},
     {file = "datasets-2.21.0.tar.gz", hash = "sha256:998f85a8460f1bd982e5bd058f8a0808eef424249e3df1e8cdd594ccd0dc8ba2"},
@@ -716,9 +739,9 @@ xxhash = "*"
 
 [package.extras]
 apache-beam = ["apache-beam (>=2.26.0)"]
-audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
+audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\""]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk (<3.8.2)", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
@@ -726,8 +749,8 @@ quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
-tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]
 
@@ -737,6 +760,7 @@ version = "0.3.8"
 description = "serialize all of Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
     {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
@@ -752,6 +776,7 @@ version = "1.9.0"
 description = "Distro - an OS platform information API"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -763,6 +788,7 @@ version = "0.4.3"
 description = "HuggingFace community-driven open-source library of evaluation"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "evaluate-0.4.3-py3-none-any.whl", hash = "sha256:47d8770bdea76e2c2ed0d40189273027d1a41ccea861bcc7ba12d30ec5d1e517"},
     {file = "evaluate-0.4.3.tar.gz", hash = "sha256:3a5700cf83aabee9549264e1e5666f116367c61dbd4d38352015e859a5e2098d"},
@@ -798,6 +824,8 @@ version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -812,6 +840,7 @@ version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
     {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
@@ -820,7 +849,7 @@ files = [
 [package.extras]
 docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"]
 testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
-typing = ["typing-extensions (>=4.12.2)"]
+typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""]
 
 [[package]]
 name = "fire"
@@ -828,6 +857,7 @@ version = "0.6.0"
 description = "A library for automatically generating command line interfaces."
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "fire-0.6.0.tar.gz", hash = "sha256:54ec5b996ecdd3c0309c800324a0703d6da512241bc73b553db959d98de0aa66"},
 ]
@@ -842,6 +872,7 @@ version = "7.1.1"
 description = "the modular source code checker: pep8 pyflakes and co"
 optional = false
 python-versions = ">=3.8.1"
+groups = ["dev"]
 files = [
     {file = "flake8-7.1.1-py2.py3-none-any.whl", hash = "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213"},
     {file = "flake8-7.1.1.tar.gz", hash = "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38"},
@@ -858,6 +889,7 @@ version = "4.55.0"
 description = "Tools to manipulate font files"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "fonttools-4.55.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:51c029d4c0608a21a3d3d169dfc3fb776fde38f00b35ca11fdab63ba10a16f61"},
     {file = "fonttools-4.55.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bca35b4e411362feab28e576ea10f11268b1aeed883b9f22ed05675b1e06ac69"},
@@ -912,18 +944,18 @@ files = [
 ]
 
 [package.extras]
-all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"]
+all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"]
 graphite = ["lz4 (>=1.7.4.2)"]
-interpolatable = ["munkres", "pycairo", "scipy"]
+interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""]
 lxml = ["lxml (>=4.0)"]
 pathops = ["skia-pathops (>=0.5.0)"]
 plot = ["matplotlib"]
 repacker = ["uharfbuzz (>=0.23.0)"]
 symfont = ["sympy"]
-type1 = ["xattr"]
+type1 = ["xattr ; sys_platform == \"darwin\""]
 ufo = ["fs (>=2.2.0,<3)"]
-unicode = ["unicodedata2 (>=15.1.0)"]
-woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
+unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""]
+woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"]
 
 [[package]]
 name = "frozenlist"
@@ -931,6 +963,7 @@ version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -1032,6 +1065,7 @@ version = "2024.6.1"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"},
     {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"},
@@ -1074,6 +1108,8 @@ version = "3.1.1"
 description = "Lightweight in-process concurrent programming"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"
 files = [
     {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"},
     {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"},
@@ -1160,6 +1196,7 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -1171,6 +1208,7 @@ version = "1.0.7"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -1192,6 +1230,7 @@ version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
     {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
@@ -1205,7 +1244,7 @@ idna = "*"
 sniffio = "*"
 
 [package.extras]
-brotli = ["brotli", "brotlicffi"]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
@@ -1217,6 +1256,7 @@ version = "0.23.0"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "huggingface_hub-0.23.0-py3-none-any.whl", hash = "sha256:075c30d48ee7db2bba779190dc526d2c11d422aed6f9044c5e2fdc2c432fdb91"},
     {file = "huggingface_hub-0.23.0.tar.gz", hash = "sha256:7126dedd10a4c6fac796ced4d87a8cf004efc722a5125c2c09299017fa366fa9"},
@@ -1251,6 +1291,7 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -1265,6 +1306,7 @@ version = "4.2.1"
 description = "Immutable wrapper around dictionaries (a fork of frozendict)"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "immutabledict-4.2.1-py3-none-any.whl", hash = "sha256:c56a26ced38c236f79e74af3ccce53772827cef5c3bce7cab33ff2060f756373"},
     {file = "immutabledict-4.2.1.tar.gz", hash = "sha256:d91017248981c72eb66c8ff9834e99c2f53562346f23e7f51e7a5ebcf66a3bcc"},
@@ -1276,6 +1318,8 @@ version = "6.4.5"
 description = "Read resources from Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version == \"3.9\""
 files = [
     {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"},
     {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"},
@@ -1285,7 +1329,7 @@ files = [
 zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
 
 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
 enabler = ["pytest-enabler (>=2.2)"]
@@ -1298,6 +1342,7 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
@@ -1309,6 +1354,7 @@ version = "3.1.4"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
     {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
@@ -1326,6 +1372,7 @@ version = "0.7.1"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "jiter-0.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:262e96d06696b673fad6f257e6a0abb6e873dc22818ca0e0600f4a1189eb334f"},
     {file = "jiter-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be6de02939aac5be97eb437f45cfd279b1dc9de358b13ea6e040e63a3221c40d"},
@@ -1408,6 +1455,7 @@ version = "1.4.2"
 description = "Lightweight pipelining with Python functions"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
     {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
@@ -1419,6 +1467,7 @@ version = "0.25.3"
 description = "A package to repair broken json strings"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "json_repair-0.25.3-py3-none-any.whl", hash = "sha256:f00b510dd21b31ebe72581bdb07e66381df2883d6f640c89605e482882c12b17"},
     {file = "json_repair-0.25.3.tar.gz", hash = "sha256:4ee970581a05b0b258b749eb8bcac21de380edda97c3717a4edfafc519ec21a4"},
@@ -1430,6 +1479,7 @@ version = "1.33"
 description = "Apply JSON-Patches (RFC 6902)"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
+groups = ["main"]
 files = [
     {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"},
     {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"},
@@ -1444,6 +1494,7 @@ version = "3.0.0"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"},
     {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
@@ -1455,6 +1506,7 @@ version = "1.4.7"
 description = "A fast implementation of the Cassowary constraint solver"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"},
     {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"},
@@ -1578,6 +1630,7 @@ version = "0.1.20"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.8.1"
+groups = ["main"]
 files = [
     {file = "langchain-0.1.20-py3-none-any.whl", hash = "sha256:09991999fbd6c3421a12db3c7d1f52d55601fc41d9b2a3ef51aab2e0e9c38da9"},
     {file = "langchain-0.1.20.tar.gz", hash = "sha256:f35c95eed8c8375e02dce95a34f2fd4856a4c98269d6dc34547a23dba5beab7e"},
@@ -1605,11 +1658,11 @@ cli = ["typer (>=0.9.0,<0.10.0)"]
 cohere = ["cohere (>=4,<6)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<6)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.0.2,<0.1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<6)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.0.2,<0.1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0) ; python_full_version >= \"3.8.1\" and python_version < \"3.12\"", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0) ; python_full_version >= \"3.8.1\" and python_full_version != \"3.9.7\" and python_version < \"4.0\"", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
 llms = ["clarifai (>=9.1.0)", "cohere (>=4,<6)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
-openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
-qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
+openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0) ; python_version >= \"3.9\""]
+qdrant = ["qdrant-client (>=1.3.1,<2.0.0) ; python_full_version >= \"3.8.1\" and python_version < \"3.12\""]
 text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
 
 [[package]]
@@ -1618,6 +1671,7 @@ version = "0.0.38"
 description = "Community contributed LangChain integrations."
 optional = false
 python-versions = "<4.0,>=3.8.1"
+groups = ["main"]
 files = [
     {file = "langchain_community-0.0.38-py3-none-any.whl", hash = "sha256:ecb48660a70a08c90229be46b0cc5f6bc9f38f2833ee44c57dfab9bf3a2c121a"},
     {file = "langchain_community-0.0.38.tar.gz", hash = "sha256:127fc4b75bc67b62fe827c66c02e715a730fef8fe69bd2023d466bab06b5810d"},
@@ -1636,7 +1690,7 @@ tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
 cli = ["typer (>=0.9.0,<0.10.0)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "azure-identity (>=1.15.0,<2.0.0)", "azure-search-documents (==11.4.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.6,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cloudpickle (>=2.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "friendli-client (>=1.2.4,<2.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "httpx-sse (>=0.4.0,<0.5.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "oracledb (>=2.2.0,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "premai (>=0.3.25,<0.4.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pyjwt (>=2.8.0,<3.0.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tidb-vector (>=0.0.3,<1.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "vdms (>=0.0.20,<0.0.21)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "azure-identity (>=1.15.0,<2.0.0)", "azure-search-documents (==11.4.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.6,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cloudpickle (>=2.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "friendli-client (>=1.2.4,<2.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "httpx-sse (>=0.4.0,<0.5.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "oracledb (>=2.2.0,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "premai (>=0.3.25,<0.4.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pyjwt (>=2.8.0,<3.0.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0) ; python_full_version >= \"3.8.1\" and python_version < \"3.12\"", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0) ; python_full_version >= \"3.8.1\" and python_full_version != \"3.9.7\" and python_version < \"4.0\"", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tidb-vector (>=0.0.3,<1.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "vdms (>=0.0.20,<0.0.21)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 
 [[package]]
 name = "langchain-core"
@@ -1644,6 +1698,7 @@ version = "0.1.53"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.8.1"
+groups = ["main"]
 files = [
     {file = "langchain_core-0.1.53-py3-none-any.whl", hash = "sha256:02a88a21e3bd294441b5b741625fa4b53b1c684fd58ba6e5d9028e53cbe8542f"},
     {file = "langchain_core-0.1.53.tar.gz", hash = "sha256:df3773a553b5335eb645827b99a61a7018cea4b11dc45efa2613fde156441cec"},
@@ -1666,6 +1721,7 @@ version = "0.0.2"
 description = "LangChain text splitting utilities"
 optional = false
 python-versions = "<4.0,>=3.8.1"
+groups = ["main"]
 files = [
     {file = "langchain_text_splitters-0.0.2-py3-none-any.whl", hash = "sha256:13887f32705862c1e1454213cb7834a63aae57c26fcd80346703a1d09c46168d"},
     {file = "langchain_text_splitters-0.0.2.tar.gz", hash = "sha256:ac8927dc0ba08eba702f6961c9ed7df7cead8de19a9f7101ab2b5ea34201b3c1"},
@@ -1683,6 +1739,7 @@ version = "1.0.9"
 description = "Language detection library ported from Google's language-detection."
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
     {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"},
@@ -1697,6 +1754,7 @@ version = "0.1.144"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
+groups = ["main"]
 files = [
     {file = "langsmith-0.1.144-py3-none-any.whl", hash = "sha256:08ffb975bff2e82fc6f5428837c64c074ea25102d08a25e256361a80812c6100"},
     {file = "langsmith-0.1.144.tar.gz", hash = "sha256:b621f358d5a33441d7b5e7264c376bf4ea82bfc62d7e41aafc0f8094e3bd6369"},
@@ -1718,6 +1776,7 @@ version = "3.0.0"
 description = "Python port of markdown-it. Markdown parsing, done right!"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
     {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
@@ -1742,6 +1801,7 @@ version = "3.0.2"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -1812,6 +1872,7 @@ version = "3.23.1"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "marshmallow-3.23.1-py3-none-any.whl", hash = "sha256:fece2eb2c941180ea1b7fcbd4a83c51bfdd50093fdd3ad2585ee5e1df2508491"},
     {file = "marshmallow-3.23.1.tar.gz", hash = "sha256:3a8dfda6edd8dcdbf216c0ede1d1e78d230a6dc9c5a088f58c4083b974a0d468"},
@@ -1831,6 +1892,7 @@ version = "3.9.2"
 description = "Python plotting package"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"},
     {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"},
@@ -1895,6 +1957,7 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -1906,6 +1969,7 @@ version = "0.1.2"
 description = "Markdown URL utilities"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
@@ -1917,6 +1981,7 @@ version = "1.3.0"
 description = "Python library for arbitrary-precision floating-point arithmetic"
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
     {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -1925,7 +1990,7 @@ files = [
 [package.extras]
 develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
 docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
+gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
 tests = ["pytest (>=4.6)"]
 
 [[package]]
@@ -1934,6 +1999,7 @@ version = "6.1.0"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -2038,6 +2104,7 @@ version = "0.70.16"
 description = "better multiprocessing and multithreading in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
@@ -2062,6 +2129,7 @@ version = "1.13.0"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"},
     {file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"},
@@ -2115,6 +2183,7 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
+groups = ["main", "dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -2126,6 +2195,7 @@ version = "3.2.1"
 description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
     {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
@@ -2144,6 +2214,7 @@ version = "3.9.1"
 description = "Natural Language Toolkit"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"},
     {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"},
@@ -2169,6 +2240,7 @@ version = "1.26.4"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
     {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@@ -2214,6 +2286,8 @@ version = "12.4.5.8"
 description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
@@ -2226,6 +2300,8 @@ version = "12.4.127"
 description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
@@ -2238,6 +2314,8 @@ version = "12.4.127"
 description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
@@ -2250,6 +2328,8 @@ version = "12.4.127"
 description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
@@ -2262,6 +2342,8 @@ version = "9.1.0.70"
 description = "cuDNN runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
@@ -2276,6 +2358,8 @@ version = "11.2.1.3"
 description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
@@ -2291,6 +2375,8 @@ version = "10.3.5.147"
 description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
@@ -2303,6 +2389,8 @@ version = "11.6.1.9"
 description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
@@ -2320,6 +2408,8 @@ version = "12.3.1.170"
 description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
@@ -2335,6 +2425,8 @@ version = "2.21.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
 ]
@@ -2345,6 +2437,8 @@ version = "12.4.127"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
@@ -2357,6 +2451,8 @@ version = "12.4.127"
 description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
@@ -2369,6 +2465,7 @@ version = "1.55.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"},
     {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"},
@@ -2393,6 +2490,8 @@ version = "3.10.11"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "platform_python_implementation != \"PyPy\""
 files = [
     {file = "orjson-3.10.11-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6dade64687f2bd7c090281652fe18f1151292d567a9302b34c2dbb92a3872f1f"},
     {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82f07c550a6ccd2b9290849b22316a609023ed851a87ea888c0456485a7d196a"},
@@ -2460,6 +2559,7 @@ version = "23.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
     {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
@@ -2471,6 +2571,7 @@ version = "2.2.3"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"},
     {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"},
@@ -2557,6 +2658,7 @@ version = "11.0.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"},
     {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"},
@@ -2640,7 +2742,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline
 fpx = ["olefile"]
 mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
-typing = ["typing-extensions"]
+typing = ["typing-extensions ; python_version < \"3.10\""]
 xmp = ["defusedxml"]
 
 [[package]]
@@ -2649,6 +2751,7 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -2664,6 +2767,7 @@ version = "0.2.0"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
@@ -2771,6 +2875,7 @@ version = "18.0.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"},
     {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"},
@@ -2825,6 +2930,7 @@ version = "2.12.1"
 description = "Python style guide checker"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"},
     {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
@@ -2836,6 +2942,7 @@ version = "2.22"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -2847,6 +2954,7 @@ version = "2.10.1"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pydantic-2.10.1-py3-none-any.whl", hash = "sha256:a8d20db84de64cf4a7d59e899c2caf0fe9d660c7cfc482528e7020d7dd189a7e"},
     {file = "pydantic-2.10.1.tar.gz", hash = "sha256:a4daca2dc0aa429555e0656d6bf94873a7dc5f54ee42b1f5873d666fb3f35560"},
@@ -2859,7 +2967,7 @@ typing-extensions = ">=4.12.2"
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
-timezone = ["tzdata"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
 
 [[package]]
 name = "pydantic-core"
@@ -2867,6 +2975,7 @@ version = "2.27.1"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pydantic_core-2.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a"},
     {file = "pydantic_core-2.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b"},
@@ -2979,6 +3088,7 @@ version = "3.2.0"
 description = "passive checker of Python programs"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
     {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
@@ -2990,6 +3100,7 @@ version = "2.18.0"
 description = "Pygments is a syntax highlighting package written in Python."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
     {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
@@ -3004,6 +3115,7 @@ version = "3.2.0"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"},
     {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"},
@@ -3018,6 +3130,7 @@ version = "8.3.3"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
     {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
@@ -3040,6 +3153,7 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -3054,6 +3168,7 @@ version = "2024.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
     {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
@@ -3065,6 +3180,7 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -3127,6 +3243,7 @@ version = "2024.11.6"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -3230,6 +3347,7 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -3251,6 +3369,7 @@ version = "1.0.0"
 description = "A utility belt for advanced users of python-requests"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["main"]
 files = [
     {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
     {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
@@ -3265,6 +3384,7 @@ version = "13.9.4"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"},
     {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
@@ -3284,6 +3404,7 @@ version = "0.1.2"
 description = "Pure python implementation of ROUGE-1.5.5."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04"},
 ]
@@ -3300,6 +3421,7 @@ version = "0.4.5"
 description = ""
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7"},
     {file = "safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27"},
@@ -3432,6 +3554,7 @@ version = "1.5.2"
 description = "A set of python modules for machine learning and data mining"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"},
     {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"},
@@ -3482,6 +3605,7 @@ version = "1.13.1"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"},
     {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"},
@@ -3518,12 +3642,35 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pyde
 doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"]
 test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
+[[package]]
+name = "seaborn"
+version = "0.13.2"
+description = "Statistical data visualization"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
+    {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
+]
+
+[package.dependencies]
+matplotlib = ">=3.4,<3.6.1 || >3.6.1"
+numpy = ">=1.20,<1.24.0 || >1.24.0"
+pandas = ">=1.2"
+
+[package.extras]
+dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"]
+docs = ["ipykernel", "nbconvert", "numpydoc", "pydata_sphinx_theme (==0.10.0rc2)", "pyyaml", "sphinx (<6.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-issues"]
+stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"]
+
 [[package]]
 name = "sentence-transformers"
 version = "2.7.0"
 description = "Multilingual text embeddings"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "sentence_transformers-2.7.0-py3-none-any.whl", hash = "sha256:6a7276b05a95931581bbfa4ba49d780b2cf6904fa4a171ec7fd66c343f761c98"},
     {file = "sentence_transformers-2.7.0.tar.gz", hash = "sha256:2f7df99d1c021dded471ed2d079e9d1e4fc8e30ecb06f957be060511b36f24ea"},
@@ -3548,14 +3695,16 @@ version = "70.3.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"},
     {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"},
 ]
+markers = {main = "python_version >= \"3.12\""}
 
 [package.extras]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.3.2) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "six"
@@ -3563,6 +3712,7 @@ version = "1.16.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+groups = ["main"]
 files = [
     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
@@ -3574,6 +3724,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -3585,6 +3736,7 @@ version = "2.6"
 description = "A modern CSS selector implementation for Beautiful Soup."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
     {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
@@ -3596,6 +3748,7 @@ version = "2.0.36"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"},
     {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"},
@@ -3691,6 +3844,7 @@ version = "1.13.1"
 description = "Computer algebra system (CAS) in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"},
     {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
@@ -3708,6 +3862,7 @@ version = "8.5.0"
 description = "Retry code until it succeeds"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"},
     {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"},
@@ -3723,6 +3878,7 @@ version = "2.5.0"
 description = "ANSI color formatting for output in terminal"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"},
     {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"},
@@ -3737,6 +3893,7 @@ version = "3.5.0"
 description = "threadpoolctl"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"},
     {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
@@ -3748,6 +3905,7 @@ version = "0.19.1"
 description = ""
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"},
     {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"},
@@ -3865,6 +4023,8 @@ version = "2.1.0"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"},
     {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"},
@@ -3876,6 +4036,7 @@ version = "2.5.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:71328e1bbe39d213b8721678f9dcac30dfc452a46d586f1d514a6aa0a99d4744"},
     {file = "torch-2.5.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:34bfa1a852e5714cbfa17f27c49d8ce35e1b7af5608c4bc6e81392c352dbc601"},
@@ -3928,6 +4089,7 @@ version = "4.67.0"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"},
     {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"},
@@ -3949,6 +4111,7 @@ version = "4.41.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"},
     {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"},
@@ -4015,6 +4178,8 @@ version = "3.1.0"
 description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""
 files = [
     {file = "triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8"},
     {file = "triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c"},
@@ -4037,6 +4202,7 @@ version = "2.32.0.20241016"
 description = "Typing stubs for requests"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"},
     {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"},
@@ -4051,6 +4217,7 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -4062,6 +4229,7 @@ version = "0.9.0"
 description = "Runtime inspection utilities for typing module."
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
     {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
@@ -4077,6 +4245,7 @@ version = "2024.2"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
+groups = ["main"]
 files = [
     {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"},
     {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
@@ -4088,13 +4257,14 @@ version = "2.2.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
     {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
 ]
 
 [package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
 h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
@@ -4105,6 +4275,7 @@ version = "1.4.0"
 description = "Wikipedia API for Python"
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2"},
 ]
@@ -4119,6 +4290,7 @@ version = "3.5.0"
 description = "Python binding for xxHash"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"},
     {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"},
@@ -4251,6 +4423,7 @@ version = "1.18.0"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "yarl-1.18.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:074fee89caab89a97e18ef5f29060ef61ba3cae6cd77673acc54bfdd3214b7b7"},
     {file = "yarl-1.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b026cf2c32daf48d90c0c4e406815c3f8f4cfe0c6dfccb094a9add1ff6a0e41a"},
@@ -4347,20 +4520,22 @@ version = "3.21.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "python_version == \"3.9\""
 files = [
     {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"},
     {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"},
 ]
 
 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
 enabler = ["pytest-enabler (>=2.2)"]
-test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
+test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
 type = ["pytest-mypy"]
 
 [metadata]
-lock-version = "2.0"
+lock-version = "2.1"
 python-versions = "^3.9"
-content-hash = "4cc4b9fe2f41be698f87441ff920a806e9c3ae0d02da0612b0087ed2cb713169"
+content-hash = "e25e722ff2e22a65a9c2f1541cbc2c40809555fbbb418c3b32dc1c441586bbfe"
diff --git a/pyproject.toml b/pyproject.toml
index 92b18249..6acdf0f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ rich = "^13.7.1"
 contextplus = "^0.1.3"
 langdetect = "^1.0.9"
 immutabledict = "^4.2.0"
+seaborn = "^0.13.2"
 
 [tool.poetry.group.dev.dependencies]
 tqdm = "^4.66.2"

From 13d8088690dce38281ba47444ef0e30cf1dfc3e7 Mon Sep 17 00:00:00 2001
From: larskaesberg <larskaesberg@gmail.com>
Date: Fri, 4 Jul 2025 15:26:34 +0200
Subject: [PATCH 5/5] fix: add types for plots

---
 mallm/evaluation/plotting/plots.py | 50 ++++++++++++++++--------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/mallm/evaluation/plotting/plots.py b/mallm/evaluation/plotting/plots.py
index 5c0eb2b2..e075ae0f 100644
--- a/mallm/evaluation/plotting/plots.py
+++ b/mallm/evaluation/plotting/plots.py
@@ -2,6 +2,7 @@
 import json
 import os
 from pathlib import Path
+from typing import Optional, Any, Union
 
 import matplotlib.pyplot as plt
 import pandas as pd
@@ -16,16 +17,16 @@
 # Define a beautiful color palette
 COLORS = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
 
-def get_colors(n_colors):
+def get_colors(n_colors: int) -> Union[list[str], np.ndarray[Any, Any]]:
     """Generate enough colors for n_colors by cycling or using colormap"""
     if n_colors <= len(COLORS):
         return COLORS[:n_colors]
     else:
         # Use a colormap for more colors
-        return plt.cm.Set3(np.linspace(0, 1, n_colors))
+        return plt.cm.Set3(np.linspace(0, 1, n_colors))  # type: ignore
 
 
-def get_consistent_color_mapping(options):
+def get_consistent_color_mapping(options: list[str]) -> dict[str, Any]:
     """Create consistent color mapping based on option names"""
     # Sort options to ensure consistent assignment
     unique_options = sorted(set(options))
@@ -93,7 +94,7 @@ def aggregate_data(
     return eval_df, stats_df
 
 
-def plot_turns_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
+def plot_turns_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: Optional[dict[str, Any]] = None) -> None:
     """Create a beautiful violin plot for turns distribution"""
     # Filter out rows with missing or invalid turns data
     df = df.dropna(subset=['turns'])
@@ -114,7 +115,7 @@ def plot_turns_with_std(df: pd.DataFrame, input_path: str, global_color_mapping:
     
     # Use global color mapping if provided, otherwise create local one
     if global_color_mapping is None:
-        color_mapping = get_consistent_color_mapping(grouped_data['option'].unique())
+        color_mapping = get_consistent_color_mapping(grouped_data['option'].unique().tolist())
     else:
         color_mapping = global_color_mapping
     
@@ -175,7 +176,7 @@ def plot_turns_with_std(df: pd.DataFrame, input_path: str, global_color_mapping:
     plt.close()
 
 
-def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
+def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: Optional[dict[str, Any]] = None) -> None:
     """Create a beautiful horizontal lollipop chart for clock seconds"""
     grouped = (
         df.groupby(["option", "dataset"])["clockSeconds"]
@@ -187,7 +188,7 @@ def plot_clock_seconds_with_std(df: pd.DataFrame, input_path: str, global_color_
     grouped['label'] = unique_labels
     
     # Sort data: baselines first, then others by shortest time
-    def sort_key(row):
+    def sort_key(row: pd.Series) -> tuple[int, float]:
         option = row['option'].lower()
         if option.startswith('baseline'):
             return (0, row['mean'])  # Baselines first, sorted by time
@@ -206,7 +207,7 @@ def sort_key(row):
     
     # Use global color mapping if provided, otherwise create local one
     if global_color_mapping is None:
-        color_mapping = get_consistent_color_mapping(grouped['option'].unique())
+        color_mapping = get_consistent_color_mapping(grouped['option'].unique().tolist())
     else:
         color_mapping = global_color_mapping
     colors = [color_mapping[option] for option in grouped['option']]
@@ -252,7 +253,7 @@ def sort_key(row):
     plt.close()
 
 
-def plot_decision_success_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
+def plot_decision_success_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: Optional[dict[str, Any]] = None) -> None:
     """Create a beautiful horizontal bar chart for decision success rates"""
     if "decisionSuccess" not in df.columns:
         print(
@@ -283,7 +284,7 @@ def plot_decision_success_with_std(df: pd.DataFrame, input_path: str, global_col
     
     # Use global color mapping if provided, otherwise create local one
     if global_color_mapping is None:
-        color_mapping = get_consistent_color_mapping(grouped['option'].unique())
+        color_mapping = get_consistent_color_mapping(grouped['option'].unique().tolist())
     else:
         color_mapping = global_color_mapping
     colors = [color_mapping[option] for option in grouped['option']]
@@ -364,38 +365,41 @@ def get_unique_labels(df: pd.DataFrame) -> list[str]:
     return unique_labels
 
 
-def get_unique_labels_from_conditions(conditions) -> list[str]:
+def get_unique_labels_from_conditions(conditions: Union[list[str], np.ndarray[Any, Any]]) -> list[str]:
     """Helper function to get unique labels from condition strings"""
     # Convert to list if it's a numpy array
+    condition_list: list[str]
     if hasattr(conditions, 'tolist'):
-        conditions = conditions.tolist()
+        condition_list = conditions.tolist()
+    else:
+        condition_list = conditions
     
-    if len(conditions) == 0:
+    if len(condition_list) == 0:
         return []
 
     # Find the longest common prefix
     common_prefix = ""
-    if len(conditions) > 0:
-        first_condition = conditions[0]
+    if len(condition_list) > 0:
+        first_condition = condition_list[0]
         for i in range(len(first_condition)):
-            if all(condition.startswith(first_condition[:i + 1]) for condition in conditions):
+            if all(condition.startswith(first_condition[:i + 1]) for condition in condition_list):
                 common_prefix = first_condition[:i + 1]
             else:
                 break
 
     # Find the longest common suffix
     common_suffix = ""
-    if len(conditions) > 0:
-        first_condition = conditions[0]
+    if len(condition_list) > 0:
+        first_condition = condition_list[0]
         for i in range(len(first_condition)):
-            if all(condition.endswith(first_condition[-(i + 1):]) for condition in conditions):
+            if all(condition.endswith(first_condition[-(i + 1):]) for condition in condition_list):
                 common_suffix = first_condition[-(i + 1):]
             else:
                 break
 
     # Extract unique parts by removing common prefix and suffix
     unique_labels = []
-    for condition in conditions:
+    for condition in condition_list:
         unique_part = condition
         if common_prefix and condition.startswith(common_prefix):
             unique_part = unique_part[len(common_prefix):]
@@ -406,7 +410,7 @@ def get_unique_labels_from_conditions(conditions) -> list[str]:
     return unique_labels
 
 
-def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: dict = None) -> None:
+def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str, global_color_mapping: Optional[dict[str, Any]] = None) -> None:
     """Create beautiful enhanced bar charts for score distributions"""
     print("Shape of stats_df:", df.shape)
     print("Columns in stats_df:", df.columns)
@@ -445,7 +449,7 @@ def plot_score_distributions_with_std(df: pd.DataFrame, input_path: str, global_
         score_data = grouped[grouped["Score Type"] == score_type].copy()
         
         # Sort data: baselines first, then alphabetically
-        def sort_key(row):
+        def sort_key(row: pd.Series) -> tuple[int, str]:
             option = row['option'].lower()
             if option.startswith('baseline'):
                 return (0, option)  # Baselines first
@@ -465,7 +469,7 @@ def sort_key(row):
         
         # Use global color mapping if provided, otherwise create local one
         if global_color_mapping is None:
-            color_mapping = get_consistent_color_mapping(score_data['option'].unique())
+            color_mapping = get_consistent_color_mapping(score_data['option'].unique().tolist())
         else:
             color_mapping = global_color_mapping
         colors = [color_mapping[option] for option in score_data['option']]