From a4fa8049a8c44b32067c4729feb8970a4fd43fad Mon Sep 17 00:00:00 2001 From: Nistha Mitra Date: Fri, 10 May 2024 14:44:49 -0700 Subject: [PATCH 1/4] "Unit Tests for curriculums" DomainRandomization LearningProgressCurriculum --- syllabus/core/curriculum_base.py | 6 +- syllabus/core/curriculum_sync_wrapper.py | 4 + syllabus/core/environment_sync_wrapper.py | 3 +- syllabus/curricula/test_seeding.py | 90 +++++++++++++++++++++++ 4 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 syllabus/curricula/test_seeding.py diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py index 03284dab..c862480b 100644 --- a/syllabus/core/curriculum_base.py +++ b/syllabus/core/curriculum_base.py @@ -12,8 +12,8 @@ class Curriculum: """Base class and API for defining curricula to interface with Gym environments. """ - - def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None) -> None: + + def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, seed: int = None, task_names: Callable = None) -> None: """Initialize the base Curriculum :param task_space: the environment's task space from which new tasks are sampled @@ -28,6 +28,7 @@ def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_name self.completed_tasks = 0 self.task_names = task_names self.n_updates = 0 + self.seed = seed if self.num_tasks == 0: warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.") @@ -194,6 +195,7 @@ def sample(self, k: int = 1) -> Union[List, Any]: # Use list of indices because np.choice does not play nice with tuple tasks # tasks = self.tasks + np.random.seed(self.seed) n_tasks = self.num_tasks task_dist = self._sample_distribution() task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist) diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py index 6e069d8c..fd3f94ac 100644 --- a/syllabus/core/curriculum_sync_wrapper.py +++ b/syllabus/core/curriculum_sync_wrapper.py @@ -29,6 +29,10 @@ def count_tasks(self, task_space=None): def tasks(self): return self.task_space.tasks + @property + def seed(self): + return self.seed + def get_tasks(self, task_space=None): return self.task_space.get_tasks(gym_space=task_space) diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py index c995aa19..c33568b1 100644 --- a/syllabus/core/environment_sync_wrapper.py +++ b/syllabus/core/environment_sync_wrapper.py @@ -39,7 +39,8 @@ def __init__(self, self.task_progress = 0.0 self._batch_step = 0 self.instance_id = components.get_id() - + self.seed = seed + self.episode_length = 0 self.episode_return = 0 diff --git a/syllabus/curricula/test_seeding.py b/syllabus/curricula/test_seeding.py new file mode 100644 index 00000000..b13f389b --- /dev/null +++ b/syllabus/curricula/test_seeding.py @@ -0,0 +1,90 @@ +from syllabus.core import Curriculum +import gymnasium as gym + +from syllabus.task_space import TaskSpace +from domain_randomization import DomainRandomization +from learning_progress import LearningProgressCurriculum +# from simple_box import SimpleBoxCurriculum +# from annealing_box import AnnealingBoxCurriculum +# from syllabus.curricula.plr import CentralizedPrioritizedLevelReplay +# from syllabus.curricula.plr import PrioritizedLevelReplay +# from syllabus.curricula.plr import TaskSampler + +#1: DomainRandomization with seed +task_space = TaskSpace(200) +seed = 3 +c = DomainRandomization(task_space = task_space, seed = seed) +sample = c.sample() +for i in range(5): + next_sample = c.sample() + assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" + sample = next_sample + +print("DomainRandomization with seed! SUCCESSFUL") + +#2: DomainRandomization without seed +task_space = TaskSpace(200) +c = DomainRandomization(task_space = task_space) +sample = c.sample() +for i in range(5): + next_sample = c.sample() + assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" + sample = next_sample + +print("DomainRandomization without seed! SUCCESSFUL") + + +#3: LearningProgressCurriculum with seed +task_space = TaskSpace(200) +seed = 5 +c = LearningProgressCurriculum(task_space = task_space, seed = seed) +sample = c.sample() +for i in range(5): + next_sample = c.sample() + assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" + sample = next_sample + +print("LearningProgressCurriculum with seed! SUCCESSFUL") + +#4: LearningProgressCurriculum without seed +task_space = TaskSpace(200) +c = LearningProgressCurriculum(task_space = task_space) +sample = c.sample() +for i in range(5): + next_sample = c.sample() + assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" + sample = next_sample + +print("LearningProgressCurriculum without seed! SUCCESSFUL") + +#4: SequentialCurriculum with seed +# task_space = TaskSpace(200) +# c = SequentialCurriculum(task_space = task_space, seed = seed, curriculum_list = list, stopping_conditions = []) +# sample = c.sample() +# for i in range(5): +# next_sample = c.sample() +# assert sample == next_sample, f"Expected all samples to be same, got {sample} and {next_sample}" +# sample = next_sample + +# print("SequentialCurriculum with seed! SUCCESSFUL") + +#5 SimpleBoxCurriculum and AnnealingBoxCurriculum with seed +# task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) +# seed = 3 + +# listb = [SimpleBoxCurriculum(task_space = task_space, seed = seed), +# AnnealingBoxCurriculum(task_space = task_space, seed = seed, start_values = [1,2], end_values = [1,5], total_steps = 1), +# ] + +# sample_list = [listb[0].sample(), listb[1].sample()] +# for i in range(5): +# next_sample_0 = listb[0].sample() +# next_sample_1 = listb[1].sample() + +# assert sample_list[0] == next_sample_0 , f'Expected all samples to be same, got {str(sample_list[0])} and {next_sample_0}' +# assert sample_list[1] == next_sample_1 , f'Expected all samples to be same, got {sample_list[1]} and {next_sample_1}' +# sample_list[0] = next_sample_0 +# sample_list[1] = next_sample_1 + +# print("Its interesting to see that given these arbitrary values the sample returns the same value over 5 iterations") +# Its interesting to see that given these arbitrary values the sample returns the same value over 5 iterations From 935f3e3c551380331657c8c1361fee43f18b0d94 Mon Sep 17 00:00:00 2001 From: Nistha Mitra Date: Tue, 14 May 2024 17:26:09 -0700 Subject: [PATCH 2/4] Unit Test Complete --- syllabus/curricula/plr/central_plr_wrapper.py | 4 +- syllabus/curricula/plr/plr_wrapper.py | 4 +- syllabus/curricula/plr/task_sampler.py | 7 +- syllabus/curricula/sequential.py | 9 +- syllabus/curricula/test_seeding.py | 138 +++++++++--------- 5 files changed, 85 insertions(+), 77 deletions(-) diff --git a/syllabus/curricula/plr/central_plr_wrapper.py b/syllabus/curricula/plr/central_plr_wrapper.py index 7f69ea85..bdf39ba3 100644 --- a/syllabus/curricula/plr/central_plr_wrapper.py +++ b/syllabus/curricula/plr/central_plr_wrapper.py @@ -102,6 +102,7 @@ class CentralizedPrioritizedLevelReplay(Curriculum): def __init__( self, task_space: TaskSpace, + seed : int = None, *curriculum_args, task_sampler_kwargs_dict: dict = None, action_space: gym.Space = None, @@ -117,6 +118,7 @@ def __init__( if task_sampler_kwargs_dict is None: task_sampler_kwargs_dict = {} + self.seed = seed self._strategy = task_sampler_kwargs_dict.get("strategy", None) if not isinstance(task_space.gym_space, Discrete) and not isinstance(task_space.gym_space, MultiDiscrete): raise ValueError( @@ -133,7 +135,7 @@ def __init__( self._gae_lambda = gae_lambda self._supress_usage_warnings = suppress_usage_warnings self._task2index = {task: i for i, task in enumerate(self.tasks)} - self._task_sampler = TaskSampler(self.tasks, action_space=action_space, **task_sampler_kwargs_dict) + self._task_sampler = TaskSampler(self.tasks, action_space=action_space, **task_sampler_kwargs_dict, seed = seed) self._rollouts = RolloutStorage( self._num_steps, self._num_processes, diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py index 9515df4b..92dc071c 100644 --- a/syllabus/curricula/plr/plr_wrapper.py +++ b/syllabus/curricula/plr/plr_wrapper.py @@ -191,6 +191,7 @@ def __init__( self, task_space: TaskSpace, observation_space: gym.Space, + seed: int = None, *curriculum_args, task_sampler_kwargs_dict: dict = None, action_space: gym.Space = None, @@ -225,8 +226,9 @@ def __init__( self._supress_usage_warnings = suppress_usage_warnings self._get_action_log_dist = get_action_log_dist self._task2index = {task: i for i, task in enumerate(self.tasks)} + self.seed = seed - self._task_sampler = TaskSampler(self.tasks, action_space=action_space, **task_sampler_kwargs_dict) + self._task_sampler = TaskSampler(self.tasks, action_space=action_space, **task_sampler_kwargs_dict, seed = self.seed) self._rollouts = RolloutStorage( self._num_steps, self._num_processes, diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py index 15ad4852..3b06eb13 100644 --- a/syllabus/curricula/plr/task_sampler.py +++ b/syllabus/curricula/plr/task_sampler.py @@ -40,6 +40,7 @@ def __init__( staleness_coef: float = 0.1, staleness_transform: str = "power", staleness_temperature: float = 1.0, + seed : int = None ): self.action_space = action_space self.tasks = tasks @@ -62,7 +63,7 @@ def __init__( self.partial_task_scores = np.zeros((num_actors, self.num_tasks), dtype=float) self.partial_task_steps = np.zeros((num_actors, self.num_tasks), dtype=np.int64) self.task_staleness = np.array([0.0] * self.num_tasks, dtype=float) - + self.seed = seed self.next_task_index = 0 # Only used for sequential strategy # Logging metrics @@ -280,6 +281,8 @@ def sample(self, strategy=None): proportion_seen = (self.num_tasks - num_unseen) / self.num_tasks if self.replay_schedule == "fixed": + if self.seed != None : + np.random.seed(self.seed) if proportion_seen >= self.rho: # Sample replay level with fixed prob = 1 - nu OR if all levels seen if np.random.rand() > self.nu or not proportion_seen < 1.0: @@ -289,6 +292,8 @@ def sample(self, strategy=None): return self._sample_unseen_level() elif self.replay_schedule == "proportionate": + if self.seed != None : + np.random.seed(self.seed) if proportion_seen >= self.rho and np.random.rand() < proportion_seen: return self._sample_replay_level() else: diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py index baa12637..7de7ded9 100644 --- a/syllabus/curricula/sequential.py +++ b/syllabus/curricula/sequential.py @@ -12,7 +12,7 @@ class SequentialCurriculum(Curriculum): REQUIRES_EPISODE_UPDATES = True REQUIRES_CENTRAL_UPDATES = False - def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], *curriculum_args, **curriculum_kwargs): + def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], seed : int = None, *curriculum_args, **curriculum_kwargs): super().__init__(*curriculum_args, **curriculum_kwargs) assert len(curriculum_list) > 0, "Must provide at least one curriculum" assert len(stopping_conditions) == len(curriculum_list) - 1, f"Stopping conditions must be one less than the number of curricula. Final curriculum is used for the remainder of training. Expected {len(curriculum_list) - 1}, got {len(stopping_conditions)}." @@ -31,6 +31,7 @@ def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[ self.n_tasks = 0 self.total_tasks = 0 self.episode_returns = [] + self.seed = seed def _parse_curriculum_list(self, curriculum_list: List[Curriculum]) -> List[Curriculum]: """ Parse the curriculum list to ensure that all items are curricula. @@ -39,12 +40,13 @@ def _parse_curriculum_list(self, curriculum_list: List[Curriculum]) -> List[Curr parsed_list = [] for item in curriculum_list: if isinstance(item, Curriculum): + item.set_seed(self.seed) parsed_list.append(item) elif isinstance(item, TaskSpace): - parsed_list.append(DomainRandomization(item)) + parsed_list.append(DomainRandomization(item, seed = self.seed)) elif isinstance(item, list): task_space = TaskSpace(len(item), item) - parsed_list.append(DomainRandomization(task_space)) + parsed_list.append(DomainRandomization(task_space, seed = self.seed)) elif self.task_space.contains(item): parsed_list.append(NoopCurriculum(item, self.task_space)) else: @@ -153,6 +155,7 @@ def sample(self, k: int = 1) -> Union[List, Any]: Choose the next k tasks from the list. """ curriculum = self.current_curriculum + curriculum.set_seed(self.seed) tasks = curriculum.sample(k) # Recode tasks into environment task space diff --git a/syllabus/curricula/test_seeding.py b/syllabus/curricula/test_seeding.py index b13f389b..269d6dd2 100644 --- a/syllabus/curricula/test_seeding.py +++ b/syllabus/curricula/test_seeding.py @@ -4,87 +4,83 @@ from syllabus.task_space import TaskSpace from domain_randomization import DomainRandomization from learning_progress import LearningProgressCurriculum -# from simple_box import SimpleBoxCurriculum -# from annealing_box import AnnealingBoxCurriculum -# from syllabus.curricula.plr import CentralizedPrioritizedLevelReplay -# from syllabus.curricula.plr import PrioritizedLevelReplay -# from syllabus.curricula.plr import TaskSampler - -#1: DomainRandomization with seed +from sequential import SequentialCurriculum +from syllabus.curricula.plr import CentralizedPrioritizedLevelReplay +from syllabus.curricula.plr import PrioritizedLevelReplay +from syllabus.curricula.plr import TaskSampler + +def seed_test(c: Curriculum): + sample = c.sample() + for i in range(5): + next_sample = c.sample() + assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" + sample = next_sample + + return True + +def no_seed_test(c: Curriculum): + sample = c.sample() + for i in range(5): + next_sample = c.sample() + assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" + sample = next_sample + + return True + +#Seed Tests task_space = TaskSpace(200) seed = 3 -c = DomainRandomization(task_space = task_space, seed = seed) -sample = c.sample() -for i in range(5): - next_sample = c.sample() - assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" - sample = next_sample -print("DomainRandomization with seed! SUCCESSFUL") +#1: DomainRandomization with seed +c = DomainRandomization(task_space = task_space, seed = seed) +if seed_test(c = c) : + print("DomainRandomization with seed! SUCCESSFUL") #2: DomainRandomization without seed -task_space = TaskSpace(200) c = DomainRandomization(task_space = task_space) -sample = c.sample() -for i in range(5): - next_sample = c.sample() - assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" - sample = next_sample - -print("DomainRandomization without seed! SUCCESSFUL") +if no_seed_test(c = c) : + print("DomainRandomization without seed! SUCCESSFUL") #3: LearningProgressCurriculum with seed -task_space = TaskSpace(200) -seed = 5 c = LearningProgressCurriculum(task_space = task_space, seed = seed) -sample = c.sample() -for i in range(5): - next_sample = c.sample() - assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" - sample = next_sample - -print("LearningProgressCurriculum with seed! SUCCESSFUL") +if seed_test(c = c) : + print("LearningProgressCurriculum with seed! SUCCESSFUL") #4: LearningProgressCurriculum without seed -task_space = TaskSpace(200) c = LearningProgressCurriculum(task_space = task_space) -sample = c.sample() -for i in range(5): - next_sample = c.sample() - assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" - sample = next_sample - -print("LearningProgressCurriculum without seed! SUCCESSFUL") - -#4: SequentialCurriculum with seed -# task_space = TaskSpace(200) -# c = SequentialCurriculum(task_space = task_space, seed = seed, curriculum_list = list, stopping_conditions = []) -# sample = c.sample() -# for i in range(5): -# next_sample = c.sample() -# assert sample == next_sample, f"Expected all samples to be same, got {sample} and {next_sample}" -# sample = next_sample - -# print("SequentialCurriculum with seed! SUCCESSFUL") - -#5 SimpleBoxCurriculum and AnnealingBoxCurriculum with seed -# task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) -# seed = 3 - -# listb = [SimpleBoxCurriculum(task_space = task_space, seed = seed), -# AnnealingBoxCurriculum(task_space = task_space, seed = seed, start_values = [1,2], end_values = [1,5], total_steps = 1), -# ] - -# sample_list = [listb[0].sample(), listb[1].sample()] -# for i in range(5): -# next_sample_0 = listb[0].sample() -# next_sample_1 = listb[1].sample() - -# assert sample_list[0] == next_sample_0 , f'Expected all samples to be same, got {str(sample_list[0])} and {next_sample_0}' -# assert sample_list[1] == next_sample_1 , f'Expected all samples to be same, got {sample_list[1]} and {next_sample_1}' -# sample_list[0] = next_sample_0 -# sample_list[1] = next_sample_1 - -# print("Its interesting to see that given these arbitrary values the sample returns the same value over 5 iterations") -# Its interesting to see that given these arbitrary values the sample returns the same value over 5 iterations +if no_seed_test(c = c) : + print("LearningProgressCurriculum without seed! SUCCESSFUL") + +#5: SequentialCurriculum with seed +list = [LearningProgressCurriculum(task_space = task_space),DomainRandomization(task_space = task_space) ] +c = SequentialCurriculum(task_space = task_space, curriculum_list = list, stopping_conditions = ["steps>1"], seed = seed) +if seed_test(c = c) : + print("SequentialCurriculum with seed! SUCCESSFUL") + +#6: SequentialCurriculum without seed +list = [LearningProgressCurriculum(task_space = task_space),DomainRandomization(task_space = task_space) ] +c = SequentialCurriculum(task_space = task_space, curriculum_list = list, stopping_conditions = ["steps>1"]) +if no_seed_test(c = c) : + print("SequentialCurriculum without seed! SUCCESSFUL") + +#7 CentralizedPrioritizedLevelReplay with seed +c = CentralizedPrioritizedLevelReplay(task_space = task_space, seed = seed) +if seed_test(c = c) : + print("CentralizedPrioritizedLevelReplay with seed! SUCCESSFUL") + +#8 CentralizedPrioritizedLevelReplay without seed +c = CentralizedPrioritizedLevelReplay(task_space = task_space) +if no_seed_test(c = c) : + print("CentralizedPrioritizedLevelReplay without seed! SUCCESSFUL") + +#9 PrioritizedLevelReplay with seed +c = PrioritizedLevelReplay(task_space = task_space, observation_space = gym.spaces.Discrete(3), seed = seed) +if seed_test(c = c) : + print("PrioritizedLevelReplay with seed! SUCCESSFUL") + +#10 PrioritizedLevelReplay without seed +c = PrioritizedLevelReplay(task_space = task_space, observation_space = gym.spaces.Discrete(3)) +if no_seed_test(c = c) : + print("PrioritizedLevelReplay without seed! SUCCESSFUL") + From dccf56198edeafca57cba49c9c2209f8d505529b Mon Sep 17 00:00:00 2001 From: Nistha Mitra Date: Sat, 29 Jun 2024 11:19:37 -0700 Subject: [PATCH 3/4] tested seeding on exp --- syllabus/core/curriculum_base.py | 6 ++++++ syllabus/examples/training_scripts/cleanrl_procgen_plr.py | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py index c862480b..7a3b63d1 100644 --- a/syllabus/core/curriculum_base.py +++ b/syllabus/core/curriculum_base.py @@ -65,6 +65,12 @@ def tasks(self) -> List[tuple]: """ return list(self.task_space.tasks) + def set_seed(self, seed: int = None) -> None: + self.seed = seed + if(seed!=None) : + np.random.seed(seed) + + def add_task(self, task: typing.Any) -> None: # TODO raise NotImplementedError("This curriculum does not support adding tasks after initialization.") diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py index e13c22ed..73511911 100644 --- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -311,7 +311,7 @@ def get_value(obs): # env setup print("Creating env") - envs = gym.vector.AsyncVectorEnv( + envs = gym.vector.SyncVectorEnv( [ make_env( args.env_id, @@ -324,7 +324,7 @@ def get_value(obs): ) envs = wrap_vecenv(envs) - test_eval_envs = gym.vector.AsyncVectorEnv( + test_eval_envs = gym.vector.SyncVectorEnv( [ make_env(args.env_id, args.seed + i, num_levels=0) for i in range(args.num_eval_episodes) @@ -332,7 +332,7 @@ def get_value(obs): ) test_eval_envs = wrap_vecenv(test_eval_envs) - train_eval_envs = gym.vector.AsyncVectorEnv( + train_eval_envs = gym.vector.SyncVectorEnv( [ make_env(args.env_id, args.seed + i, num_levels=200) for i in range(args.num_eval_episodes) @@ -368,6 +368,7 @@ def get_value(obs): episode_rewards = deque(maxlen=10) completed_episodes = 0 + for update in range(1, num_updates + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: From 0bf21885e2e660275963932bec8cb4a85c3fe316 Mon Sep 17 00:00:00 2001 From: Nistha Mitra Date: Tue, 2 Jul 2024 00:33:46 -0700 Subject: [PATCH 4/4] Add sample weights as an accepted value for curriculum. --- syllabus/core/curriculum_base.py | 8 ++++++-- syllabus/curricula/test_seeding.py | 26 +++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py index 7a3b63d1..79a2c79e 100644 --- a/syllabus/core/curriculum_base.py +++ b/syllabus/core/curriculum_base.py @@ -13,7 +13,7 @@ class Curriculum: """Base class and API for defining curricula to interface with Gym environments. """ - def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, seed: int = None, task_names: Callable = None) -> None: + def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, seed: int = None, sample_weights: list = None, task_names: Callable = None) -> None: """Initialize the base Curriculum :param task_space: the environment's task space from which new tasks are sampled @@ -29,6 +29,7 @@ def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, seed: int self.task_names = task_names self.n_updates = 0 self.seed = seed + self.sample_weights = sample_weights if self.num_tasks == 0: warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.") @@ -203,7 +204,10 @@ def sample(self, k: int = 1) -> Union[List, Any]: # tasks = self.tasks np.random.seed(self.seed) n_tasks = self.num_tasks - task_dist = self._sample_distribution() + if self.sample_weights == None: + task_dist = self._sample_distribution() + else : + task_dist = self.sample_weights task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist) return task_idx diff --git a/syllabus/curricula/test_seeding.py b/syllabus/curricula/test_seeding.py index 269d6dd2..46f9c89a 100644 --- a/syllabus/curricula/test_seeding.py +++ b/syllabus/curricula/test_seeding.py @@ -8,11 +8,13 @@ from syllabus.curricula.plr import CentralizedPrioritizedLevelReplay from syllabus.curricula.plr import PrioritizedLevelReplay from syllabus.curricula.plr import TaskSampler +import numpy as np def seed_test(c: Curriculum): sample = c.sample() for i in range(5): next_sample = c.sample() + print(next_sample) assert sample == next_sample, f"Expected all samples to be the same, got {sample} and {next_sample}" sample = next_sample @@ -20,15 +22,21 @@ def seed_test(c: Curriculum): def no_seed_test(c: Curriculum): sample = c.sample() + list = [int(sample[0])] for i in range(5): next_sample = c.sample() - assert sample != next_sample, f"Expected all samples to be different, got {sample} and {next_sample}" + list.append(int(next_sample[0])) + print(next_sample) sample = next_sample - return True - + if(len(set(list))<=1) : + raise Exception(f"Expected samples to variable, only one sample value {sample}") + else : + return True + #Seed Tests task_space = TaskSpace(200) +# task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) seed = 3 #1: DomainRandomization with seed @@ -84,3 +92,15 @@ def no_seed_test(c: Curriculum): if no_seed_test(c = c) : print("PrioritizedLevelReplay without seed! SUCCESSFUL") +#11 DomainRandomization with seed with sample_weights +space = TaskSpace(gym.spaces.Discrete(4), ["a", "b", "c","d"]) +c = DomainRandomization(task_space = space, seed = seed, sample_weights = [0.6,0.2,0.1,0.1]) +if seed_test(c = c) : + print("DomainRandomization with seed with sample weights! SUCCESSFUL") + +#2: DomainRandomization without seed +c = DomainRandomization(task_space = space, sample_weights = [0.3,0.2,0.4,0.1]) +if no_seed_test(c = c) : + print("DomainRandomization without seed with sample weights! SUCCESSFUL") + +