From 1ef8655af001ea9df1a19ac8adc9e33551b545ef Mon Sep 17 00:00:00 2001 From: Joshua Evans Date: Thu, 13 Feb 2025 15:42:30 +0000 Subject: [PATCH 1/3] Added seeding to discrete and function-approximated environments. --- simpleoptions/environment.py | 11 +++++++++++ .../function_approximation/environment.py | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/simpleoptions/environment.py b/simpleoptions/environment.py index c010b5d..914c67e 100644 --- a/simpleoptions/environment.py +++ b/simpleoptions/environment.py @@ -1,6 +1,7 @@ import copy import random +import numpy as np import networkx as nx from typing import List, Set @@ -75,6 +76,16 @@ def render(self, mode: str = "human") -> None: """ pass + def seed(self, random_seed: int) -> None: + """ + Seed the environment's random number generator(s). + + Args: + random_seed (int): The random seed to use for random number generation. + """ + random.seed(random_seed) + np.random.seed(random_seed) + @abstractmethod def close(self): """ diff --git a/simpleoptions/function_approximation/environment.py b/simpleoptions/function_approximation/environment.py index 2998027..2b36e34 100644 --- a/simpleoptions/function_approximation/environment.py +++ b/simpleoptions/function_approximation/environment.py @@ -1,3 +1,5 @@ +import random + import numpy as np import gymnasium as gym @@ -59,6 +61,16 @@ def render(self) -> None: """ pass + def seed(self, random_seed: int) -> None: + """ + Seed the environment's random number generator(s). + + Args: + random_seed (int): The random seed to use for random number generation. + """ + random.seed(random_seed) + np.random.seed(random_seed) + @abstractmethod def close(self) -> None: """ @@ -218,6 +230,9 @@ def step(self, action: Hashable, state: Hashable = None) -> Tuple[Hashable, floa def render(self) -> None: return self.env.render() + def seed(self, random_seed: int) -> None: + return self.env.seed(random_seed) + def close(self) -> None: return self.env.close() From 93b2ab18a896800e4e2bcfd5ecf1be9a7d7a0ab9 Mon Sep 17 00:00:00 2001 From: Joshua Evans Date: Thu, 13 Feb 2025 15:56:01 +0000 Subject: [PATCH 2/3] Ruff Formatting --- example/small_rooms_env.py | 7 ++++++- .../function_approximation/__init__.py | 5 ++++- simpleoptions/implementations/__init__.py | 19 +++++++++++++++---- simpleoptions/implementations/betweenness.py | 10 ++++++++-- .../implementations/diffusion_options.py | 8 +++++++- .../subgoal_option_generator.py | 8 +++++++- simpleoptions/options_agent.py | 14 +++++++++----- 7 files changed, 56 insertions(+), 15 deletions(-) diff --git a/example/small_rooms_env.py b/example/small_rooms_env.py index 880e9aa..162b933 100644 --- a/example/small_rooms_env.py +++ b/example/small_rooms_env.py @@ -49,7 +49,12 @@ def step(self, action): reward += 10 # Return (next_state, reward, terminal, info). - return self.current_state, reward, self.is_state_terminal(self.current_state), {} + return ( + self.current_state, + reward, + self.is_state_terminal(self.current_state), + {}, + ) def get_action_space(self): # The agent has four actions (up, down, left, right). diff --git a/simpleoptions/function_approximation/__init__.py b/simpleoptions/function_approximation/__init__.py index 0114426..8fcf1e2 100644 --- a/simpleoptions/function_approximation/__init__.py +++ b/simpleoptions/function_approximation/__init__.py @@ -1,6 +1,9 @@ import sys -from simpleoptions.function_approximation.environment import ApproxBaseEnvironment, GymWrapper +from simpleoptions.function_approximation.environment import ( + ApproxBaseEnvironment, + GymWrapper, +) from simpleoptions.function_approximation.primitive_option import PrimitiveOption __all__ = ["ApproxBaseEnvironment", "GymWrapper", "PrimitiveOption"] diff --git a/simpleoptions/implementations/__init__.py b/simpleoptions/implementations/__init__.py index ca8b900..02499d7 100644 --- a/simpleoptions/implementations/__init__.py +++ b/simpleoptions/implementations/__init__.py @@ -1,11 +1,22 @@ # Generic Option Generators. -from simpleoptions.implementations.generic_option_generator import GenericOptionGenerator -from simpleoptions.implementations.subgoal_option_generator import SubgoalOptionGenerator, SubgoalOption +from simpleoptions.implementations.generic_option_generator import ( + GenericOptionGenerator, +) +from simpleoptions.implementations.subgoal_option_generator import ( + SubgoalOptionGenerator, + SubgoalOption, +) # Skill Discovery Algorithm Implementations. from simpleoptions.implementations.eigenoptions import EigenoptionGenerator, Eigenoption -from simpleoptions.implementations.diffusion_options import DiffusionOptionGenerator, DiffusionOption -from simpleoptions.implementations.betweenness import BetweennessOptionGenerator, BetweennessOption +from simpleoptions.implementations.diffusion_options import ( + DiffusionOptionGenerator, + DiffusionOption, +) +from simpleoptions.implementations.betweenness import ( + BetweennessOptionGenerator, + BetweennessOption, +) __all__ = [ diff --git a/simpleoptions/implementations/betweenness.py b/simpleoptions/implementations/betweenness.py index ece1b6a..e837d4a 100644 --- a/simpleoptions/implementations/betweenness.py +++ b/simpleoptions/implementations/betweenness.py @@ -57,10 +57,16 @@ def generate_options( # Define options for reaching each subgoal. options = [None for _ in range(len(subgoals))] for i, subgoal in tqdm(enumerate(subgoals), desc="Training Betweeness Options..."): - initiation_set = sorted(list(nx.single_target_shortest_path_length(stg, subgoal)), key=lambda x: x[1]) + initiation_set = sorted( + list(nx.single_target_shortest_path_length(stg, subgoal)), + key=lambda x: x[1], + ) initiation_set = list(list(zip(*initiation_set))[0])[1 : self.initiation_set_size + 1] options[i] = BetweennessOption( - env=env, subgoal=subgoal, initiation_set=set(initiation_set), betweenness=centralities[subgoal] + env=env, + subgoal=subgoal, + initiation_set=set(initiation_set), + betweenness=centralities[subgoal], ) self.train_option(options[i]) diff --git a/simpleoptions/implementations/diffusion_options.py b/simpleoptions/implementations/diffusion_options.py index 5cef62d..937d3dd 100644 --- a/simpleoptions/implementations/diffusion_options.py +++ b/simpleoptions/implementations/diffusion_options.py @@ -166,7 +166,13 @@ def _is_local_maxima(self, node: Hashable, stg: nx.Graph, centralities: Dict): class DiffusionOption(SubgoalOption): - def __init__(self, env: BaseEnvironment, subgoal: Hashable, initiation_set: Set[Hashable], q_table: Dict = None): + def __init__( + self, + env: BaseEnvironment, + subgoal: Hashable, + initiation_set: Set[Hashable], + q_table: Dict = None, + ): super().__init__(env, subgoal, initiation_set, q_table) def __str__(self): diff --git a/simpleoptions/implementations/subgoal_option_generator.py b/simpleoptions/implementations/subgoal_option_generator.py index 7cd8f24..f79ff84 100644 --- a/simpleoptions/implementations/subgoal_option_generator.py +++ b/simpleoptions/implementations/subgoal_option_generator.py @@ -110,7 +110,13 @@ def _select_action(self, state: Hashable, option: "SubgoalOption", q_table: Dict class SubgoalOption(BaseOption): - def __init__(self, env: BaseEnvironment, subgoal: Hashable, initiation_set: Set[Hashable], q_table: Dict = None): + def __init__( + self, + env: BaseEnvironment, + subgoal: Hashable, + initiation_set: Set[Hashable], + q_table: Dict = None, + ): self.env = copy.copy(env) self.subgoal = subgoal self.initiation_set = initiation_set diff --git a/simpleoptions/options_agent.py b/simpleoptions/options_agent.py index efd9c83..cdeccfb 100644 --- a/simpleoptions/options_agent.py +++ b/simpleoptions/options_agent.py @@ -415,7 +415,11 @@ def run_agent( if not epoch_eval and episodic_eval: return self.training_log, self.episodic_evaluation_log if epoch_eval and episodic_eval: - return self.training_log, self.epoch_evaluation_log, self.episodic_evaluation_log + return ( + self.training_log, + self.epoch_evaluation_log, + self.episodic_evaluation_log, + ) else: training_epoch_rewards = [ @@ -488,11 +492,11 @@ def test_policy( } for key, value in transition.items(): if not episodic_eval: - self.epoch_evaluation_log[f"evaluation_{eval_number}"][f"run_{test_run+1}"][key].append( - value - ) + self.epoch_evaluation_log[f"evaluation_{eval_number}"][f"run_{test_run + 1}"][ + key + ].append(value) else: - self.episodic_evaluation_log[f"evaluation_{eval_number}"][f"run_{test_run+1}"][ + self.episodic_evaluation_log[f"evaluation_{eval_number}"][f"run_{test_run + 1}"][ key ].append(value) # Reset environment and continue evaluation run. From 26e914bd8b44c03e93c9a9b09e2e872901e42baa Mon Sep 17 00:00:00 2001 From: Joshua Evans Date: Thu, 13 Feb 2025 15:57:02 +0000 Subject: [PATCH 3/3] Bump version 0.10.0 -> 0.11.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cfe189c..a2d43b5 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="simpleoptions", - version="0.10.0", + version="0.11.0", author="Joshua Evans", author_email="jbe25@bath.ac.uk", description="A simple and flexible framework for working with Options in Reinforcement Learning.",