RyanNavillus · AdrianHuang2002 · Mar 31, 2024 · Apr 16, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/profiling_results.prof b/profiling_results.prof
diff --git a/syllabus/.DS_Store b/syllabus/.DS_Store
diff --git a/syllabus/core/.DS_Store b/syllabus/core/.DS_Store
diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py
@@ -3,34 +3,44 @@
 from typing import Any, Callable, List, Tuple, Union
 
 import numpy as np
-from gymnasium.spaces import Dict
-
+from gymnasium.spaces import Dict, Box
+import random
 from syllabus.task_space import TaskSpace
+from itertools import product
 
 
 # TODO: Move non-generic logic to Uniform class. Allow subclasses to call super for generic error handling
 class Curriculum:
     """Base class and API for defining curricula to interface with Gym environments.
     """
 
-    def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None) -> None:
+    def __init__(self, task_space: TaskSpace, task_names: Callable = None, warmup_strategy: str = None, warmup_samples: int = 0) -> None:
         """Initialize the base Curriculum
 
         :param task_space: the environment's task space from which new tasks are sampled
         TODO: Implement this in a way that works with any curriculum, maybe as a wrapper
-        :param random_start_tasks: Number of uniform random tasks to sample before using the algorithm's sample method, defaults to 0
         TODO: Use task space for this
         :param task_names: Names of the tasks in the task space, defaults to None
         """
         assert isinstance(task_space, TaskSpace), f"task_space must be a TaskSpace object. Got {type(task_space)} instead."
         self.task_space = task_space
-        self.random_start_tasks = random_start_tasks
         self.completed_tasks = 0
         self.task_names = task_names
         self.n_updates = 0
-
-        if self.num_tasks == 0:
+        self.startup_sampled_tasks = 0
+        self.warmup_strategy = warmup_strategy
+        self.warmup_tasks = warmup_samples
+        self.fix_curr_index = 0
+
+        if warmup_strategy == "fix" and isinstance(self.task_space.gym_space, Box):
+            self.fix_box_space = self._initialize_fixed_grid()
+
+        if self.num_tasks is None:
+            warnings.warn("Task space is continuous. Number of warmup tasks can't be compared to the task space size.")
+        elif self.num_tasks == 0:
             warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.")
+        elif warmup_samples > self.num_tasks:
+            warnings.warn("Number of warmup tasks is larger than task space, some tasks will be replayed during warmup.")
 
     @property
     def requires_step_updates(self) -> bool:
@@ -172,14 +182,47 @@ def _sample_distribution(self) -> List[float]:
         Any curriculum that maintains a true probability distribution should implement this method to retrieve it.
         """
         raise NotImplementedError
-
-    def _should_use_startup_sampling(self) -> bool:
-        return self.random_start_tasks > 0 and self.completed_tasks < self.random_start_tasks
-
-    def _startup_sample(self) -> List:
-        task_dist = [0.0 / self.num_tasks for _ in range(self.num_tasks)]
-        task_dist[0] = 1.0
-        return task_dist
+
+    def _initialize_fixed_grid(self):
+        dims = self.task_space.gym_space.shape[0]
+        samples_per_dim = int(round(pow(self.warmup_tasks,(1 / dims))))
+        ranges = [np.linspace(self.task_space.gym_space.low[i], self.task_space.gym_space.high[i], samples_per_dim)
+                  for i in range(dims)]
+        all_points = list(product(*ranges))
+        sampled_tasks = [tuple(point) for point in all_points]
+
+        return sampled_tasks
+
+    def _should_use_startup_sampling(self) -> bool:  
+        return self.warmup_strategy != "none" and self.startup_sampled_tasks < self.warmup_tasks
+
+    def _startup_sample(self, k: int) -> List:
+        sampled_tasks = []
+
+        if isinstance(self.task_space.gym_space, Box):
+            if self.warmup_strategy == "fix":
+                sampled_tasks = self.fix_box_space
+                self.fix_curr_index = (self.fix_curr_index + self.warmup_tasks) % len(sampled_tasks)
+            elif self.warmup_strategy == "random":
+                sampled_tasks = [self.task_space.gym_space.sample() for _ in range(k)]
+
+        else:
+            if self.warmup_strategy == "fix":
+                if self.fix_curr_index + k > self.num_tasks:
+                    sampled_tasks = self.tasks[self.fix_curr_index:self.num_tasks]
+                    self.fix_curr_index = self.fix_curr_index + k - self.num_tasks
+                    sampled_tasks.extend(self.tasks[0:(self.fix_curr_index)])
+                else:
+                    sampled_tasks = self.tasks[self.fix_curr_index:self.fix_curr_index + k]
+                    self.fix_curr_index += k
+
+            elif self.warmup_strategy == "random":
+                # Allows sampling with replacement, making duplicates possible if k > num_tasks.
+                indices = random.choices(range(self.num_tasks), k=k)
+                sampled_tasks = [self.tasks[idx] for idx in indices]
+
+        self.startup_sampled_tasks += k
+        return sampled_tasks
 
     def sample(self, k: int = 1) -> Union[List, Any]:
         """Sample k tasks from the curriculum.
@@ -190,14 +233,20 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         assert self.num_tasks > 0, "Task space is empty. Please add tasks to the curriculum before sampling."
 
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-
-        # Use list of indices because np.choice does not play nice with tuple tasks
-        # tasks = self.tasks
-        n_tasks = self.num_tasks
+            tasks = self._startup_sample(k)
+            # Check if the startup sampling has satisfied the request or if there's no progress (no tasks returned)
+            if len(tasks) > 0 and len(tasks) < k:  # Check if we need to add more tasks
+                additional_tasks = self.sample(k=k-len(tasks))
+                tasks.extend(additional_tasks) 
+            return tasks
+
         task_dist = self._sample_distribution()
-        task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist)
-        return task_idx
+
+        # Normal sampling process
+        tasks = self.tasks
+        n_tasks = len(tasks)
+        task_idx = np.random.choice(range(n_tasks), size=k, p=task_dist)
+        return [tasks[i] for i in task_idx]
 
     def log_metrics(self, writer, step=None, log_full_dist=False):
         """Log the task distribution to the provided tensorboard writer.

diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py
@@ -78,10 +78,13 @@ def reset(self, *args, **kwargs):
             added_tasks = message["added_tasks"]
             for add_task in added_tasks:
                 self.env.add_task(add_task)
-        return self.env.reset(*args, new_task=next_task, **kwargs)
+        obs, info = self.env.reset(*args, new_task=next_task, **kwargs)
+        info["task"] = self.task_space.encode(self.get_task())
+        return obs, info
 
     def step(self, action):
         obs, rew, term, trunc, info = step_api_compatibility(self.env.step(action), output_truncation_bool=True)
+        info["task"] = self.task_space.encode(self.get_task())
         self.episode_length += 1
         self.episode_return += rew
         self.task_progress = info.get("task_completion", 0.0)
@@ -339,4 +342,4 @@ def add_task(self, task):
     def __getattr__(self, attr):
         env_attr = getattr(self.env, attr, None)
         if env_attr:
-            return env_attr
+            return env_attr
diff --git a/syllabus/curricula/.DS_Store b/syllabus/curricula/.DS_Store
diff --git a/syllabus/curricula/__init__.py b/syllabus/curricula/__init__.py
@@ -6,6 +6,6 @@
 from .plr.central_plr_wrapper import CentralizedPrioritizedLevelReplay
 from .plr.plr_wrapper import PrioritizedLevelReplay
 from .plr.task_sampler import TaskSampler
-from .sequential import SequentialCurriculum
+from .sequential import SequentialCurriculum, Condition
 from .simple_box import SimpleBoxCurriculum
 from .annealing_box import AnnealingBoxCurriculum
diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py
@@ -48,6 +48,9 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         Sample k tasks from the curriculum.
         """
         # Linear annealing from start_values to end_values
+        if self._should_use_startup_sampling():
+            return self._startup_sample(k)
+
         annealed_values = (
                 self.start_values + (self.end_values - self.start_values) *
                 np.minimum(self.current_step, self.total_steps) / self.total_steps

diff --git a/syllabus/curricula/plr/central_plr_wrapper.py b/syllabus/curricula/plr/central_plr_wrapper.py
@@ -215,9 +215,9 @@ def _sample_distribution(self) -> List[float]:
     def sample(self, k: int = 1) -> Union[List, Any]:
         self.num_samples += 1
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-        else:
-            return [self._task_sampler.sample() for _ in range(k)]
+            return self._startup_sample(k)
+
+        return [self._task_sampler.sample() for _ in range(k)]
 
     def _enumerate_tasks(self, space):
         assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete"

diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py
@@ -248,9 +248,9 @@ def _sample_distribution(self) -> List[float]:
 
     def sample(self, k: int = 1) -> Union[List, Any]:
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-        else:
-            return [self._task_sampler.sample() for _ in range(k)]
+            return self._startup_sample(k)
+
+        return [self._task_sampler.sample() for _ in range(k)]
 
     def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None:
         """

diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py
@@ -310,6 +310,8 @@ def sample_weights(self):
                 self.staleness_temperature,
                 self.task_staleness,
             )
+            if np.isclose(np.sum(staleness_weights), 0):
+                staleness_weights = np.ones_like(staleness_weights, dtype=float) / len(staleness_weights)
             staleness_weights = staleness_weights * (1 - self.unseen_task_weights)
             z = np.sum(staleness_weights)
             if z > 0: