Skip to content
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4b5363a
pass single process test
Mar 31, 2024
ce3c005
Your commit message
Apr 16, 2024
d97f8fc
did some changes
Apr 17, 2024
9ec5728
fixed the issues
Apr 17, 2024
2cf206c
Applied changes from diff and resolved conflicts
Apr 18, 2024
241636f
sb3
AdrianHuang2002 Apr 27, 2024
e3391c2
refine sb3
AdrianHuang2002 Apr 29, 2024
d2f1450
refine sb3
AdrianHuang2002 Apr 29, 2024
e402274
Model Architecture
AdrianHuang2002 Apr 29, 2024
b061c00
Model Architecture and sb3_procgen_plr refine
AdrianHuang2002 Apr 29, 2024
9d30b18
Model Architecture and sb3_procgen_plr refine
AdrianHuang2002 Apr 29, 2024
5700585
Add files via upload
AdrianHuang2002 Apr 29, 2024
b660323
Model Architecture modify
AdrianHuang2002 Apr 30, 2024
9f903dc
Merge branch 'sb3-progen-plr' of github.com:AdrianHuang2002/Syllabus …
AdrianHuang2002 Apr 30, 2024
37d29c3
Model Architecture modify
AdrianHuang2002 Apr 30, 2024
906a55b
Model Architecture modify
AdrianHuang2002 May 3, 2024
979876e
Add SB3 Agent code
RyanNavillus May 6, 2024
e8bfba0
Model Architecture completed version
AdrianHuang2002 May 8, 2024
d3ce33d
changed init_weights method
AdrianHuang2002 May 8, 2024
27b80aa
changed init_weights method
AdrianHuang2002 May 9, 2024
e85eaff
changed init_weights method
AdrianHuang2002 May 9, 2024
48ca132
value_net weight update
AdrianHuang2002 May 10, 2024
3330f15
init_weights update and change in CustomCallback
AdrianHuang2002 May 12, 2024
1bb55b9
init_weights update and change in CustomCallback _on_step function
AdrianHuang2002 May 14, 2024
02daa40
init_weights update and change in CustomCallback _on_step function
AdrianHuang2002 May 14, 2024
3a93293
changes in CustomCallback _on_step function
AdrianHuang2002 May 18, 2024
c8b5ae6
Merge branch 'main' into sb3-progen-plr
RyanNavillus May 18, 2024
143d59f
Testing eval changes
RyanNavillus May 18, 2024
d899f04
Fix tasks for PLR update
RyanNavillus May 18, 2024
e13b8e4
sb3-procgen-plr final version
AdrianHuang2002 May 20, 2024
0712d48
Update curriculum_base.py
May 31, 2024
c4de462
changes in SequentialCurriculum
AdrianHuang2002 Jul 6, 2024
11f1c5b
a bit reduce to the overlapped part for condition ‘call’ function and…
AdrianHuang2002 Jul 9, 2024
04066da
Local changes before merge
AdrianHuang2002 Jul 17, 2024
acb5097
modification for custom_metric
AdrianHuang2002 Jul 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added profiling_results.prof
Binary file not shown.
Binary file added syllabus/.DS_Store
Binary file not shown.
Binary file added syllabus/core/.DS_Store
Binary file not shown.
93 changes: 71 additions & 22 deletions syllabus/core/curriculum_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,44 @@
from typing import Any, Callable, List, Tuple, Union

import numpy as np
from gymnasium.spaces import Dict

from gymnasium.spaces import Dict, Box
import random
from syllabus.task_space import TaskSpace
from itertools import product


# TODO: Move non-generic logic to Uniform class. Allow subclasses to call super for generic error handling
class Curriculum:
"""Base class and API for defining curricula to interface with Gym environments.
"""

def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None) -> None:
def __init__(self, task_space: TaskSpace, task_names: Callable = None, warmup_strategy: str = None, warmup_samples: int = 0) -> None:
"""Initialize the base Curriculum

:param task_space: the environment's task space from which new tasks are sampled
TODO: Implement this in a way that works with any curriculum, maybe as a wrapper
:param random_start_tasks: Number of uniform random tasks to sample before using the algorithm's sample method, defaults to 0
TODO: Use task space for this
:param task_names: Names of the tasks in the task space, defaults to None
"""
assert isinstance(task_space, TaskSpace), f"task_space must be a TaskSpace object. Got {type(task_space)} instead."
self.task_space = task_space
self.random_start_tasks = random_start_tasks
self.completed_tasks = 0
self.task_names = task_names
self.n_updates = 0

if self.num_tasks == 0:
self.startup_sampled_tasks = 0
self.warmup_strategy = warmup_strategy
self.warmup_tasks = warmup_samples
self.fix_curr_index = 0

if warmup_strategy == "fix" and isinstance(self.task_space.gym_space, Box):
self.fix_box_space = self._initialize_fixed_grid()

if self.num_tasks is None:
warnings.warn("Task space is continuous. Number of warmup tasks can't be compared to the task space size.")
elif self.num_tasks == 0:
warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.")
elif warmup_samples > self.num_tasks:
warnings.warn("Number of warmup tasks is larger than task space, some tasks will be replayed during warmup.")

@property
def requires_step_updates(self) -> bool:
Expand Down Expand Up @@ -172,14 +182,47 @@ def _sample_distribution(self) -> List[float]:
Any curriculum that maintains a true probability distribution should implement this method to retrieve it.
"""
raise NotImplementedError

def _should_use_startup_sampling(self) -> bool:
return self.random_start_tasks > 0 and self.completed_tasks < self.random_start_tasks

def _startup_sample(self) -> List:
task_dist = [0.0 / self.num_tasks for _ in range(self.num_tasks)]
task_dist[0] = 1.0
return task_dist

def _initialize_fixed_grid(self):
dims = self.task_space.gym_space.shape[0]
samples_per_dim = int(round(pow(self.warmup_tasks,(1 / dims))))
ranges = [np.linspace(self.task_space.gym_space.low[i], self.task_space.gym_space.high[i], samples_per_dim)
for i in range(dims)]
all_points = list(product(*ranges))
sampled_tasks = [tuple(point) for point in all_points]

return sampled_tasks

def _should_use_startup_sampling(self) -> bool:
return self.warmup_strategy != "none" and self.startup_sampled_tasks < self.warmup_tasks

def _startup_sample(self, k: int) -> List:
sampled_tasks = []

if isinstance(self.task_space.gym_space, Box):
if self.warmup_strategy == "fix":
sampled_tasks = self.fix_box_space
self.fix_curr_index = (self.fix_curr_index + self.warmup_tasks) % len(sampled_tasks)
elif self.warmup_strategy == "random":
sampled_tasks = [self.task_space.gym_space.sample() for _ in range(k)]

else:
if self.warmup_strategy == "fix":
if self.fix_curr_index + k > self.num_tasks:
sampled_tasks = self.tasks[self.fix_curr_index:self.num_tasks]
self.fix_curr_index = self.fix_curr_index + k - self.num_tasks
sampled_tasks.extend(self.tasks[0:(self.fix_curr_index)])
else:
sampled_tasks = self.tasks[self.fix_curr_index:self.fix_curr_index + k]
self.fix_curr_index += k

elif self.warmup_strategy == "random":
# Allows sampling with replacement, making duplicates possible if k > num_tasks.
indices = random.choices(range(self.num_tasks), k=k)
sampled_tasks = [self.tasks[idx] for idx in indices]

self.startup_sampled_tasks += k
return sampled_tasks

def sample(self, k: int = 1) -> Union[List, Any]:
"""Sample k tasks from the curriculum.
Expand All @@ -190,14 +233,20 @@ def sample(self, k: int = 1) -> Union[List, Any]:
assert self.num_tasks > 0, "Task space is empty. Please add tasks to the curriculum before sampling."

if self._should_use_startup_sampling():
return self._startup_sample()

# Use list of indices because np.choice does not play nice with tuple tasks
# tasks = self.tasks
n_tasks = self.num_tasks
tasks = self._startup_sample(k)
# Check if the startup sampling has satisfied the request or if there's no progress (no tasks returned)
if len(tasks) > 0 and len(tasks) < k: # Check if we need to add more tasks
additional_tasks = self.sample(k=k-len(tasks))
tasks.extend(additional_tasks)
return tasks

task_dist = self._sample_distribution()
task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist)
return task_idx

# Normal sampling process
tasks = self.tasks
n_tasks = len(tasks)
task_idx = np.random.choice(range(n_tasks), size=k, p=task_dist)
return [tasks[i] for i in task_idx]

def log_metrics(self, writer, step=None, log_full_dist=False):
"""Log the task distribution to the provided tensorboard writer.
Expand Down
7 changes: 5 additions & 2 deletions syllabus/core/environment_sync_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,13 @@ def reset(self, *args, **kwargs):
added_tasks = message["added_tasks"]
for add_task in added_tasks:
self.env.add_task(add_task)
return self.env.reset(*args, new_task=next_task, **kwargs)
obs, info = self.env.reset(*args, new_task=next_task, **kwargs)
info["task"] = self.task_space.encode(self.get_task())
return obs, info

def step(self, action):
obs, rew, term, trunc, info = step_api_compatibility(self.env.step(action), output_truncation_bool=True)
info["task"] = self.task_space.encode(self.get_task())
self.episode_length += 1
self.episode_return += rew
self.task_progress = info.get("task_completion", 0.0)
Expand Down Expand Up @@ -339,4 +342,4 @@ def add_task(self, task):
def __getattr__(self, attr):
env_attr = getattr(self.env, attr, None)
if env_attr:
return env_attr
return env_attr
Binary file added syllabus/curricula/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion syllabus/curricula/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
from .plr.central_plr_wrapper import CentralizedPrioritizedLevelReplay
from .plr.plr_wrapper import PrioritizedLevelReplay
from .plr.task_sampler import TaskSampler
from .sequential import SequentialCurriculum
from .sequential import SequentialCurriculum, Condition
from .simple_box import SimpleBoxCurriculum
from .annealing_box import AnnealingBoxCurriculum
3 changes: 3 additions & 0 deletions syllabus/curricula/annealing_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def sample(self, k: int = 1) -> Union[List, Any]:
Sample k tasks from the curriculum.
"""
# Linear annealing from start_values to end_values
if self._should_use_startup_sampling():
return self._startup_sample(k)

annealed_values = (
self.start_values + (self.end_values - self.start_values) *
np.minimum(self.current_step, self.total_steps) / self.total_steps
Expand Down
6 changes: 3 additions & 3 deletions syllabus/curricula/plr/central_plr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ def _sample_distribution(self) -> List[float]:
def sample(self, k: int = 1) -> Union[List, Any]:
self.num_samples += 1
if self._should_use_startup_sampling():
return self._startup_sample()
else:
return [self._task_sampler.sample() for _ in range(k)]
return self._startup_sample(k)

return [self._task_sampler.sample() for _ in range(k)]

def _enumerate_tasks(self, space):
assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete"
Expand Down
6 changes: 3 additions & 3 deletions syllabus/curricula/plr/plr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,9 @@ def _sample_distribution(self) -> List[float]:

def sample(self, k: int = 1) -> Union[List, Any]:
if self._should_use_startup_sampling():
return self._startup_sample()
else:
return [self._task_sampler.sample() for _ in range(k)]
return self._startup_sample(k)

return [self._task_sampler.sample() for _ in range(k)]

def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None:
"""
Expand Down
2 changes: 2 additions & 0 deletions syllabus/curricula/plr/task_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ def sample_weights(self):
self.staleness_temperature,
self.task_staleness,
)
if np.isclose(np.sum(staleness_weights), 0):
staleness_weights = np.ones_like(staleness_weights, dtype=float) / len(staleness_weights)
staleness_weights = staleness_weights * (1 - self.unseen_task_weights)
z = np.sum(staleness_weights)
if z > 0:
Expand Down
Loading