diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000..46a561d6 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,9 @@ +[codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = .git*,*.svg,package-lock.json,*-lock.yaml,*.lock,*.css,.codespellrc,playground,*.jsonl,.cache,*/math.json,*setup.cfg +check-hidden = true +# Ignore embedded images, camelCase/PascalCase identifiers, and URLs +ignore-regex = ^\s*"image/\S+": ".*|\b[a-z]+[A-Z]\w*\b|\b[A-Z][a-z]+[A-Z]\w*\b|https?://\S+ +# Domain-specific terms and variable names that are not typos +# ot: file extension, fro: Frobenius norm in PyTorch, alse: part of regex (F|f)alse, eles: variable name (elements) +ignore-words-list = ans,rouge,aci,nd,medias,te,ot,fro,alse,eles diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000..c59e0473 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,25 @@ +# Codespell configuration is within .codespellrc +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Annotate locations with typos + uses: codespell-project/codespell-problem-matcher@v1 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/.gitignore b/.gitignore index 75099947..44b23a14 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,4 @@ wheels/ **/.claude/ **/workspace/ **/CLAUDE.md -**/logs/ \ No newline at end of file +**/logs/.npm/ diff --git a/README.md b/README.md index 8f53deaa..0b44d347 100644 --- a/README.md +++ b/README.md @@ -260,7 +260,7 @@ For training, please refer to [`./deepanalyze/ms-swift/requirements.txt`](./deep answer = deepanalyze.generate(prompt, workspace=workspace) print(answer["reasoning"]) ``` - You shoud get a deep research report, which can be rendered as a PDF.: + You should get a deep research report, which can be rendered as a PDF.: ```text # Comprehensive Analysis of Student Enrollment Patterns and Institutional Transfers diff --git a/deepanalyze/SkyRL/skyagent/README.md b/deepanalyze/SkyRL/skyagent/README.md index f86bb306..30ae8eb8 100644 --- a/deepanalyze/SkyRL/skyagent/README.md +++ b/deepanalyze/SkyRL/skyagent/README.md @@ -2,7 +2,7 @@ SkyAgent is a generic agent layer for training and evaluating agents. -SkyAgent is designed primarly for researchers to have a unified interface around implementing agentic tasks. A modular design allows researchers to +SkyAgent is designed primarily for researchers to have a unified interface around implementing agentic tasks. A modular design allows researchers to 1. bring in their own tasks 2. use any training backend or simply run evaluation 3. modify runtime implementation for a given task diff --git a/deepanalyze/SkyRL/skyagent/skyagent/agents/base.py b/deepanalyze/SkyRL/skyagent/skyagent/agents/base.py index f7427391..d11833fa 100644 --- a/deepanalyze/SkyRL/skyagent/skyagent/agents/base.py +++ b/deepanalyze/SkyRL/skyagent/skyagent/agents/base.py @@ -339,7 +339,7 @@ def _post_process_results( has_finish_action_list.append(result.get("finish", False)) finish_reason_list.append(result.get("finish_reason", None)) - # Encode messages, get assitant mask and position ids + # Encode messages, get assistant mask and position ids prompt_encodings = self.tokenizer.apply_chat_template( all_prompts, # return_tensors="pt", diff --git a/deepanalyze/SkyRL/skyagent/skyagent/tasks/general_react/utils.py b/deepanalyze/SkyRL/skyagent/skyagent/tasks/general_react/utils.py index d08758e5..4bb550ca 100644 --- a/deepanalyze/SkyRL/skyagent/skyagent/tasks/general_react/utils.py +++ b/deepanalyze/SkyRL/skyagent/skyagent/tasks/general_react/utils.py @@ -26,7 +26,7 @@ def get_instruction(cls, instance: Dict[str, Any]) -> str: system_prompt = { "role": "system", "content": "Please solve the problem with the following tools and return the final answer inside the finish tool. \ - If there are additional requirments such as the answer should be included inside \\boxed{}, please return the answer in the format of \ + If there are additional requirements such as the answer should be included inside \\boxed{}, please return the answer in the format of \ \ \\boxed{'The final answer goes here.'} \ " diff --git a/deepanalyze/SkyRL/skyagent/skyagent/tasks/swebench/utils.py b/deepanalyze/SkyRL/skyagent/skyagent/tasks/swebench/utils.py index e440f43a..e01e3154 100644 --- a/deepanalyze/SkyRL/skyagent/skyagent/tasks/swebench/utils.py +++ b/deepanalyze/SkyRL/skyagent/skyagent/tasks/swebench/utils.py @@ -85,10 +85,10 @@ def _get_swebench_workspace_dir_name(instance: pd.Series, dataset: str) -> str: # Phase 1. READING: read the problem and reword it in clearer terms # 1.1 If there are code or config snippets. Express in words any best practices or conventions in them. -# 1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details. +# 1.2 Highlight message errors, method names, variables, file names, stack traces, and technical details. # 1.3 Explain the problem in clear terms. # 1.4 Enumerate the steps to reproduce the problem. -# 1.5 Hightlight any best practices to take into account when testing and fixing the issue +# 1.5 Highlight any best practices to take into account when testing and fixing the issue # Phase 2. RUNNING: install and run the tests on the repository # 2.1 Follow the readme diff --git a/deepanalyze/SkyRL/skyagent/skyagent/tasks/verifiers/naive_dapo.py b/deepanalyze/SkyRL/skyagent/skyagent/tasks/verifiers/naive_dapo.py index d26a1dd7..a86fac86 100644 --- a/deepanalyze/SkyRL/skyagent/skyagent/tasks/verifiers/naive_dapo.py +++ b/deepanalyze/SkyRL/skyagent/skyagent/tasks/verifiers/naive_dapo.py @@ -506,7 +506,7 @@ def compute_score(solution_str: str, if "\\pi" in extracted_model_output or "\\pi" in ground_truth: equivs = [] for pi in [math.pi, 3.14]: - equivs.append(math_equal(extracted_model_output, ground_truth, tiemout=True, pi=pi)) + equivs.append(math_equal(extracted_model_output, ground_truth, timeout=True, pi=pi)) correct = any(equivs) else: correct = math_equal(extracted_model_output, ground_truth, timeout=True) diff --git a/deepanalyze/SkyRL/skyagent/skyagent/tools/prompt.py b/deepanalyze/SkyRL/skyagent/skyagent/tools/prompt.py index 3716d1cf..72615f77 100644 --- a/deepanalyze/SkyRL/skyagent/skyagent/tools/prompt.py +++ b/deepanalyze/SkyRL/skyagent/skyagent/tools/prompt.py @@ -11,5 +11,5 @@ 2. **Key Extraction for Evidence**: Identify and extract the **most relevant information** from the content, you never miss any important information, output the **full original context** of the content as far as possible, it can be more than three paragraphs. 3. **Summary Output for Summary**: Organize into a concise paragraph with logical flow, prioritizing clarity and judge the contribution of the information to the goal. -**Final Output Format using JSON format has "rational", "evidence", "summary" feilds** +**Final Output Format using JSON format has "rational", "evidence", "summary" fields** """ diff --git a/deepanalyze/SkyRL/skyrl-gym/skyrl_gym/envs/lcb/livecodebench.py b/deepanalyze/SkyRL/skyrl-gym/skyrl_gym/envs/lcb/livecodebench.py index 2246ad68..a84d9b18 100644 --- a/deepanalyze/SkyRL/skyrl-gym/skyrl_gym/envs/lcb/livecodebench.py +++ b/deepanalyze/SkyRL/skyrl-gym/skyrl_gym/envs/lcb/livecodebench.py @@ -83,7 +83,7 @@ class TimeoutException(Exception): def timeout_handler(signum, frame): - print("timeout occured: alarm went off") + print("timeout occurred: alarm went off") raise TimeoutException @@ -195,7 +195,7 @@ def compile_code(code: str, timeout: int): # else condition allows future extensibility to other platforms compiled_sol = tmp_sol.Solution() else: - # do nothing in the other case since function is accesible + # do nothing in the other case since function is accessible compiled_sol = tmp_sol assert compiled_sol is not None @@ -389,9 +389,9 @@ def grade_stdio( if stripped_prediction_line == stripped_gt_out_line: continue - ## CASE 2: element-wise comparision + ## CASE 2: element-wise comparison ## if there are floating elements - ## use `decimal` library for good floating point comparision + ## use `decimal` library for good floating point comparison ## otherwise gotcha: np.isclose(50000000000000000, 50000000000000001) = True ## note that we should always be able to convert to decimals diff --git a/deepanalyze/SkyRL/skyrl-train/docs/configuration/config.rst b/deepanalyze/SkyRL/skyrl-train/docs/configuration/config.rst index 7dab48e9..223574d5 100644 --- a/deepanalyze/SkyRL/skyrl-train/docs/configuration/config.rst +++ b/deepanalyze/SkyRL/skyrl-train/docs/configuration/config.rst @@ -468,7 +468,7 @@ Weight Transfer Configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ``generator.weight_sync_backend``: Backend to use for weight synchronization. Currently, we support ``nccl`` and ``gloo``. -- ``generator.override_existing_update_group``: Whether to override the existing update group for the inference engine. This is applicable only for remote inference engines. During training, `skyrl-train` forms a custom process group ("update group") with the rank 0 training worker and all the inference engine ranks. If ``override_existing_update_group=enable``, then during initialization, a previous weight update group will be overriden in the inference engine. For example, if you have a remote server setup and you run training for the same model multiple times, it is helpful to override the previous update group. We recommend leaving this to ``auto`` - since it will automatically determine if the previous update group should be overridden based on ``run_engines_locally``. +- ``generator.override_existing_update_group``: Whether to override the existing update group for the inference engine. This is applicable only for remote inference engines. During training, `skyrl-train` forms a custom process group ("update group") with the rank 0 training worker and all the inference engine ranks. If ``override_existing_update_group=enable``, then during initialization, a previous weight update group will be overridden in the inference engine. For example, if you have a remote server setup and you run training for the same model multiple times, it is helpful to override the previous update group. We recommend leaving this to ``auto`` - since it will automatically determine if the previous update group should be overridden based on ``run_engines_locally``. Inference Engine Configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -481,8 +481,8 @@ Inference Engine Configuration - ``generator.vllm_v1_disable_multiproc``: If ``true``, this will set ``VLLM_ENABLE_V1_MULTIPROCESSING=0`` in the environment, which makes the scheduling deterministic. This is useful for reproducibility. - ``generator.enable_prefix_caching``: Whether to enable prefix caching for the inference engine. Applicable only when ``backend="vllm"``. This can be left to the default ``true`` in most cases. Note that in the case of remote inference engines, you would need to match the setting used when you initialized the remote servers. - ``generator.enable_chunked_prefill``: Whether to enable chunked prefill for the inference engine. Applicable only when ``backend="vllm"``. With vLLM, this can be left to the default ``true`` in most cases. -- ``generator.max_num_seqs``: Continous batching parameter for vLLM. Maximum number of sequences to pack into a batch. -- ``generator.max_num_batched_tokens``: Continous batching parameter for vLLM. Maximum number of tokens to pack into a batch. +- ``generator.max_num_seqs``: Continuous batching parameter for vLLM. Maximum number of sequences to pack into a batch. +- ``generator.max_num_batched_tokens``: Continuous batching parameter for vLLM. Maximum number of tokens to pack into a batch. Generation Parameters diff --git a/deepanalyze/SkyRL/skyrl-train/docs/getting-started/development.rst b/deepanalyze/SkyRL/skyrl-train/docs/getting-started/development.rst index ac03e36e..fd7efe8e 100644 --- a/deepanalyze/SkyRL/skyrl-train/docs/getting-started/development.rst +++ b/deepanalyze/SkyRL/skyrl-train/docs/getting-started/development.rst @@ -45,7 +45,7 @@ CPU tests GPU tests ~~~~~~~~~ -The GPU tests require a node with atleast 8 GPUs. They have been tested on a 8xH100 node, but should work even on 8xA100 nodes. We are actively working on making these more accessible. +The GPU tests require a node with at least 8 GPUs. They have been tested on a 8xH100 node, but should work even on 8xA100 nodes. We are actively working on making these more accessible. .. code-block:: bash diff --git a/deepanalyze/SkyRL/skyrl-train/docs/tutorials/tools_guide.rst b/deepanalyze/SkyRL/skyrl-train/docs/tutorials/tools_guide.rst index 2ebc440c..60776566 100644 --- a/deepanalyze/SkyRL/skyrl-train/docs/tutorials/tools_guide.rst +++ b/deepanalyze/SkyRL/skyrl-train/docs/tutorials/tools_guide.rst @@ -12,7 +12,7 @@ Core Concepts **ToolGroup**: A ``ToolGroup`` is a collection of related tools that share the same context or states. Tool groups enable all tools within the group to access and modify the shared state, such as a shared database connection or cache. -**Environment**: An ``Environment`` is a class that defines the task for the agent to solve, and can integrate one ore more tool groups for the agent to use. See the following doc for more details on how to build an environment: :doc:`new_env`. +**Environment**: An ``Environment`` is a class that defines the task for the agent to solve, and can integrate one or more tool groups for the agent to use. See the following doc for more details on how to build an environment: :doc:`new_env`. ToolGroup and the @tool Decorator @@ -84,14 +84,14 @@ Search ToolGroup Environment Integration ------------------------ -Tools groups can be integrated into any environment in SkyGym-RL. The base environment class for text-based environments is ``BaseTextEnv``, which provides simple utilities for managing and using multiple tool groups in a single envrionment. +Tools groups can be integrated into any environment in SkyGym-RL. The base environment class for text-based environments is ``BaseTextEnv``, which provides simple utilities for managing and using multiple tool groups in a single environment. The following sub-sections walk through integrating and using tools in an environment. Tool Initialization ~~~~~~~~~~~~~~~~~~~ -To incorporate tools into an envrionment, first build and initialize the tool groups during environment construction: +To incorporate tools into an environment, first build and initialize the tool groups during environment construction: .. code-block:: python @@ -112,7 +112,7 @@ To incorporate tools into an envrionment, first build and initialize the tool gr Tool Execution ~~~~~~~~~~~~~~ -To use a tool and get the result, you can call the ``_execute_tool`` (provided by ``BaseTextEnv``) method with the tool group name, tool name, and the tool input. Tools are most often used in the envrionment ``step`` method. +To use a tool and get the result, you can call the ``_execute_tool`` (provided by ``BaseTextEnv``) method with the tool group name, tool name, and the tool input. Tools are most often used in the environment ``step`` method. .. code-block:: python diff --git a/deepanalyze/SkyRL/skyrl-train/import_utils.py b/deepanalyze/SkyRL/skyrl-train/import_utils.py index 991ea32d..d85d85e0 100644 --- a/deepanalyze/SkyRL/skyrl-train/import_utils.py +++ b/deepanalyze/SkyRL/skyrl-train/import_utils.py @@ -2783,7 +2783,7 @@ def propagate_frozenset(unordered_import_structure): else: # If k is not a frozenset, it means that the dictionary is not "level": some keys (top-level) - # are frozensets, whereas some are not -> frozenset keys are at an unkown depth-level of the + # are frozensets, whereas some are not -> frozenset keys are at an unknown depth-level of the # dictionary. # # We recursively propagate the frozenset for this specific dictionary so that the frozensets diff --git a/deepanalyze/SkyRL/skyrl-train/pyproject.toml b/deepanalyze/SkyRL/skyrl-train/pyproject.toml index 3c602048..19bf50d1 100644 --- a/deepanalyze/SkyRL/skyrl-train/pyproject.toml +++ b/deepanalyze/SkyRL/skyrl-train/pyproject.toml @@ -53,7 +53,7 @@ skyrl-gym = { path = "./skyrl-gym" , editable = true } torch = { index = "pytorch-cu128" } torchvision = { index = "pytorch-cu128" } flash-attn = { url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl" } -# NOTE (sumanthrh): We explictly use a flashinfer wheel from their index. +# NOTE (sumanthrh): We explicitly use a flashinfer wheel from their index. # The wheels on PyPI don't come with pre-compiled kernels and the package will JIT compile them at runtime which is slow. # additionally, different inference engines may pin different compatible flashinfer versions, so we provide the option to pin different versions for vllm/sglang flashinfer-python = [ diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/dataset/replay_buffer.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/dataset/replay_buffer.py index aaf455fb..3ef8b0e3 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/dataset/replay_buffer.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/dataset/replay_buffer.py @@ -48,7 +48,7 @@ class Experience: base_action_log_probs: (B, A) values: (B, A) returns: (B, A) - advatanges: (B, A) + advantages: (B, A) attention_mask: (B, S) action_mask: (B, A) kl: (B, A) @@ -124,7 +124,7 @@ class BufferItem: base_action_log_probs: (A) values: (1) returns: (1) - advatanges: (1) + advantages: (1) attention_mask: (S) loss_mask: (A) action_mask: (A) diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/deepspeed_strategy.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/deepspeed_strategy.py index 5284efd6..842b311a 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/deepspeed_strategy.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/deepspeed_strategy.py @@ -78,7 +78,7 @@ def setup_distributed(self, timeout=timedelta(minutes=30)) -> None: if local_rank != -1: torch.cuda.set_device(local_rank) - # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + # Initializes the distributed backend which will take care of synchronizing nodes/GPUs deepspeed.init_distributed(timeout=timeout) self.world_size = dist.get_world_size() self.accumulated_gradient = ( diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/ulysses/monkey_patch.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/ulysses/monkey_patch.py index cf320af6..98fd1d15 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/ulysses/monkey_patch.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/distributed/ulysses/monkey_patch.py @@ -139,7 +139,7 @@ def apply_monkey_patch( ), f"num_attention_heads {num_attention_heads} must be divisible by ulysses_sp_size {ulysses_sp_size}" assert ( num_key_value_heads % ulysses_sp_size == 0 or ulysses_sp_size % num_key_value_heads == 0 - ), f"num_key_value_heads {num_key_value_heads} must be divisible by ulysses_sp_size {ulysses_sp_size}or vise versa. Upon ulysses_sp_size % num_key_value_heads == 0,kv heads are repeated to ensure correctness." + ), f"num_key_value_heads {num_key_value_heads} must be divisible by ulysses_sp_size {ulysses_sp_size}or vice versa. Upon ulysses_sp_size % num_key_value_heads == 0,kv heads are repeated to ensure correctness." # transformers<=4.47.1 if use_remove_padding or ulysses_sp_size > 1: if hasattr(module, "_flash_attention_forward"): diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/entrypoints/main_base.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/entrypoints/main_base.py index 75607d50..ec8263e5 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/entrypoints/main_base.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/entrypoints/main_base.py @@ -128,7 +128,7 @@ def get_train_dataset(self): # make sure the dataset is large enough to train on assert ( len(prompts_dataset) >= self.cfg.trainer.train_batch_size - ), f"dataset should be atleast as large as `train_batch_size` {self.cfg.trainer.train_batch_size}, got size {len(prompts_dataset)}" + ), f"dataset should be at least as large as `train_batch_size` {self.cfg.trainer.train_batch_size}, got size {len(prompts_dataset)}" return prompts_dataset def get_eval_dataset(self): diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/inference_engines/vllm/vllm_engine.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/inference_engines/vllm/vllm_engine.py index d847b952..3b76f859 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/inference_engines/vllm/vllm_engine.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/inference_engines/vllm/vllm_engine.py @@ -286,7 +286,7 @@ async def update_named_weights(self, request: NamedWeightsUpdateRequest): raise ValueError(f"Expected update weight request with 'names' entry, got keys: {request.keys()}") if not len(request["names"]): - raise ValueError("Update weight request should have atleast one entry in 'names'") + raise ValueError("Update weight request should have at least one entry in 'names'") engine = self._get_engine() # Use IPC if handles are provided @@ -387,7 +387,7 @@ async def update_named_weights(self, request: NamedWeightsUpdateRequest): raise ValueError(f"Expected update weight request with 'names' entry, got keys: {request.keys()}") if not len(request["names"]): - raise ValueError("Update weight request should have atleast one entry in 'names'") + raise ValueError("Update weight request should have at least one entry in 'names'") engine = self._get_engine() # Use IPC if handles are provided @@ -491,7 +491,7 @@ async def _destroy_weights_update_group(self): # raise ValueError(f"Expected update weight request with 'names' entry, got keys: {request.keys()}") # if not len(request["names"]): -# raise ValueError("Update weight request should have atleast one entry in 'names'") +# raise ValueError("Update weight request should have at least one entry in 'names'") # engine = self._get_engine() # # Use IPC if handles are provided diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/models.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/models.py index 505f3c8e..7acd10e6 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/models.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/models.py @@ -831,8 +831,8 @@ def get_llm_for_sequence_regression( # https://github.com/huggingface/transformers/issues/26877 model.config.use_cache = False - # NOTE: For reward model training only, intialize value_head manually - # because deepspeed.zero.Init() will not intialize them. + # NOTE: For reward model training only, initialize value_head manually + # because deepspeed.zero.Init() will not initialize them. # TODO: Find a better way to clarify reward model training. if init_value_head: value_head = getattr(model, value_head_prefix) diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/ppo_utils.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/ppo_utils.py index ad77252e..61f557d1 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/ppo_utils.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/ppo_utils.py @@ -327,7 +327,7 @@ def register(cls, name: Union[str, StrEnum], func: Callable): If ray is initialized, this function will get or create a named ray actor (RegistryActor) for the registry, and sync the registry to the actor. - If ray is not initalized, the function will be stored in the local registry only. + If ray is not initialized, the function will be stored in the local registry only. To make sure all locally registered functions are available to all ray processes, call sync_with_actor() after ray.init(). diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/torch_utils.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/torch_utils.py index 27f3d48f..24352a14 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/torch_utils.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/torch_utils.py @@ -154,7 +154,7 @@ def logprobs_from_logits_v2( logits_labels - logsumexp_values ) # log_softmax(x_i) = x_i - logsumexp(x) else: - # logsumexp approach is unstable with bfloat16, fall back to slightly less efficent approach + # logsumexp approach is unstable with bfloat16, fall back to slightly less efficient approach logprobs_labels = [] for row_logits, row_labels in zip( logits, labels diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/utils.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/utils.py index 7ab36d34..0670ef18 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/utils.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/utils/utils.py @@ -54,7 +54,7 @@ def validate_batch_sizes(cfg: DictConfig): 1. Each prompt in train_batch_size creates `n_samples_per_prompt` total samples. 2. During training, these samples are split across data parallel (DP) workers, making the effective per-GPU batch size: `train_batch_size * n_samples_per_prompt / dp_size`. 3. Mini batches are similarly normalized to per-gpu mini batches with size: `mini_batch_size * n_samples_per_prompt / dp_size`. - 4. Per-gpu train batch size must be divisble by per-gpu mini batch size, otherwise the last mini batch will be incomplete. + 4. Per-gpu train batch size must be divisible by per-gpu mini batch size, otherwise the last mini batch will be incomplete. 5. Per-gpu mini batch size must be divisible by per-gpu micro batch size, otherwise the last micro batch will be incomplete. """ assert cfg.trainer.train_batch_size >= cfg.trainer.policy_mini_batch_size @@ -305,7 +305,7 @@ def validate_cfg(cfg: DictConfig): if not cfg.generator.batched: raise ValueError( - "Gneration with `trainer.algorithm.use_tis` needs to be batched with only single turn generation" + "Generation with `trainer.algorithm.use_tis` needs to be batched with only single turn generation" ) if cfg.generator.sampling_params.logprobs is not None: diff --git a/deepanalyze/SkyRL/skyrl-train/skyrl_train/workers/worker.py b/deepanalyze/SkyRL/skyrl-train/skyrl_train/workers/worker.py index 8fe9ea0e..30e0ed01 100644 --- a/deepanalyze/SkyRL/skyrl-train/skyrl_train/workers/worker.py +++ b/deepanalyze/SkyRL/skyrl-train/skyrl_train/workers/worker.py @@ -329,7 +329,7 @@ async def init_weight_sync_state( # NOTE (sumanthrh): This doesn't work yet, and is thus commented out. # The better way is to just have this specified in __del__, but there is # no guarattee that __del__ will be called in general. Ray also doesn't - # explictly call __del__ when the actor shuts down. + # explicitly call __del__ when the actor shuts down. # It's commented out so that we can fix this in the future. # atexit.register(self._handle_termination) diff --git a/deepanalyze/SkyRL/skyrl-train/tests/gpu/test_policy_local_engines_e2e.py b/deepanalyze/SkyRL/skyrl-train/tests/gpu/test_policy_local_engines_e2e.py index 0e68b964..4ea08452 100644 --- a/deepanalyze/SkyRL/skyrl-train/tests/gpu/test_policy_local_engines_e2e.py +++ b/deepanalyze/SkyRL/skyrl-train/tests/gpu/test_policy_local_engines_e2e.py @@ -131,7 +131,7 @@ def test_policy_local_engines_e2e( colocate_all, weight_sync_backend, strategy, backend, tp_size ): """ - Tests initalizing the policy actor group and inference engine, syncing weights, and performing generation. + Tests initializing the policy actor group and inference engine, syncing weights, and performing generation. """ try: cfg = get_test_actor_config() diff --git "a/deepanalyze/ms-swift/docs/source/Instruction/GRPO/DeveloperGuide/\345\245\226\345\212\261\346\250\241\345\236\213.md" "b/deepanalyze/ms-swift/docs/source/Instruction/GRPO/DeveloperGuide/\345\245\226\345\212\261\346\250\241\345\236\213.md" index 57bbccbb..e6aae0b0 100644 --- "a/deepanalyze/ms-swift/docs/source/Instruction/GRPO/DeveloperGuide/\345\245\226\345\212\261\346\250\241\345\236\213.md" +++ "b/deepanalyze/ms-swift/docs/source/Instruction/GRPO/DeveloperGuide/\345\245\226\345\212\261\346\250\241\345\236\213.md" @@ -47,7 +47,7 @@ class RMlugin(DefaultRMPlugin): def __init__(self, model, template): super().__init__(model, template) - # initilize PTEngine to infer + # initialize PTEngine to infer self.engine = PtEngine.from_model_template(self.model, self.template, max_batch_size=0) ... diff --git "a/deepanalyze/ms-swift/docs/source/Instruction/\345\270\270\350\247\201\351\227\256\351\242\230\346\225\264\347\220\206.md" "b/deepanalyze/ms-swift/docs/source/Instruction/\345\270\270\350\247\201\351\227\256\351\242\230\346\225\264\347\220\206.md" index 9cf6479b..b313824f 100644 --- "a/deepanalyze/ms-swift/docs/source/Instruction/\345\270\270\350\247\201\351\227\256\351\242\230\346\225\264\347\220\206.md" +++ "b/deepanalyze/ms-swift/docs/source/Instruction/\345\270\270\350\247\201\351\227\256\351\242\230\346\225\264\347\220\206.md" @@ -190,7 +190,7 @@ swift3.0没这个参数了,用`strict`参数。 ### Q59: 运行sft命令出现报错如下: ```text -RuntimeError: Expected to mark a variable ready only once.This error is caused by one of the following reasons: 1) Use of a module parameter outsid forward function. Please make sure model parameters are not shared across multiple concurrent forward-backward passes. or try to use _set_static_graph( ) as round if this module graph does not change during training loop.2) Reused parameters in multiple reentrant backward passes. For example, if you use multiple oint` functions to wrap the same part of your model, it would result in the same set of parameters been used by different reentrant backward passes multiple and hence marking a variable ready multiple times. DDP does not support such use cases in default. You can try to use _set_static_graph( ) as a workaround if dule graph does not change over iterations. +RuntimeError: Expected to mark a variable ready only once.This error is caused by one of the following reasons: 1) Use of a module parameter outside forward function. Please make sure model parameters are not shared across multiple concurrent forward-backward passes. or try to use _set_static_graph( ) as round if this module graph does not change during training loop.2) Reused parameters in multiple reentrant backward passes. For example, if you use multiple checkpoint` functions to wrap the same part of your model, it would result in the same set of parameters been used by different reentrant backward passes multiple and hence marking a variable ready multiple times. DDP does not support such use cases in default. You can try to use _set_static_graph( ) as a workaround if dule graph does not change over iterations. ``` 加一下这个参数,`--gradient_checkpointing_kwargs '{"use_reentrant": false}'`。 @@ -586,7 +586,7 @@ CUDA_VISIBLE_DEVICES=0 NPROC_PER_NODE=1 MAX_PIXELS=1003520 swift sft --model Qwe ### Q37: 请问swift支持embedding模型的推理吗?出现如下报错了 ```text -[rank0]:[W511 17:18:01.815062493ProcessGroupNCCL.cpp:1250]Warning: WARNING: process group has NOT been destroyed before we destruct Proc essGroupNCCL. On normal program exit, the application should call des troy_process_group to ensure that any pendingNCCL operations have fi nished in this process. In rare cases this process can exit before th is point and block the progress of another member of the process grou p. This constraint has always been present, but this warning has onl y been added since PyTorch 2.4 (function operator( )) +[rank0]:[W511 17:18:01.815062493ProcessGroupNCCL.cpp:1250]Warning: WARNING: process group has NOT been destroyed before we destruct Proc essGroupNCCL. On normal program exit, the application should call des troy_process_group to ensure that any pendingNCCL operations have fi nished in this process. In rare cases this process can exit before th is point and block the progress of another member of the process grou p. This constraint has always been present, but this warning has only y been added since PyTorch 2.4 (function operator( )) ``` embedding模型推理请使用官方模型代码,swift还没支持。 diff --git a/deepanalyze/ms-swift/docs/source_en/Instruction/Frequently-asked-questions.md b/deepanalyze/ms-swift/docs/source_en/Instruction/Frequently-asked-questions.md index 72b65471..9f9d6940 100644 --- a/deepanalyze/ms-swift/docs/source_en/Instruction/Frequently-asked-questions.md +++ b/deepanalyze/ms-swift/docs/source_en/Instruction/Frequently-asked-questions.md @@ -190,7 +190,7 @@ This parameter no longer exists in swift3.0, use the `strict` parameter instead. ### Q59: Getting this error when running sft command: ```text -RuntimeError: Expected to mark a variable ready only once.This error is caused by one of the following reasons: 1) Use of a module parameter outsid forward function. Please make sure model parameters are not shared across multiple concurrent forward-backward passes. or try to use _set_static_graph( ) as round if this module graph does not change during training loop.2) Reused parameters in multiple reentrant backward passes. For example, if you use multiple oint` functions to wrap the same part of your model, it would result in the same set of parameters been used by different reentrant backward passes multiple and hence marking a variable ready multiple times. DDP does not support such use cases in default. You can try to use _set_static_graph( ) as a workaround if dule graph does not change over iterations. +RuntimeError: Expected to mark a variable ready only once.This error is caused by one of the following reasons: 1) Use of a module parameter outside forward function. Please make sure model parameters are not shared across multiple concurrent forward-backward passes. or try to use _set_static_graph( ) as round if this module graph does not change during training loop.2) Reused parameters in multiple reentrant backward passes. For example, if you use multiple checkpoint` functions to wrap the same part of your model, it would result in the same set of parameters been used by different reentrant backward passes multiple and hence marking a variable ready multiple times. DDP does not support such use cases in default. You can try to use _set_static_graph( ) as a workaround if dule graph does not change over iterations. ``` Add this parameter: `--gradient_checkpointing_kwargs '{"use_reentrant": false}'`. diff --git a/deepanalyze/ms-swift/docs/source_en/Instruction/GRPO/DeveloperGuide/reward_model.md b/deepanalyze/ms-swift/docs/source_en/Instruction/GRPO/DeveloperGuide/reward_model.md index c6b7045d..7cc1e72c 100644 --- a/deepanalyze/ms-swift/docs/source_en/Instruction/GRPO/DeveloperGuide/reward_model.md +++ b/deepanalyze/ms-swift/docs/source_en/Instruction/GRPO/DeveloperGuide/reward_model.md @@ -47,7 +47,7 @@ class RMlugin(DefaultRMPlugin): def __init__(self, model, template): super().__init__(model, template) - # initilize PTEngine to infer + # initialize PTEngine to infer self.engine = PtEngine.from_model_template(self.model, self.template, max_batch_size=0) ... diff --git a/deepanalyze/ms-swift/examples/train/grpo/plugin/plugin.py b/deepanalyze/ms-swift/examples/train/grpo/plugin/plugin.py index d7b1b6ca..84e44baa 100644 --- a/deepanalyze/ms-swift/examples/train/grpo/plugin/plugin.py +++ b/deepanalyze/ms-swift/examples/train/grpo/plugin/plugin.py @@ -732,7 +732,7 @@ class CustomizedRMPlugin: """ Customized Reward Model Plugin, same to DefaultRMPlugin - It assumes that `self.model` is a classification model with a value head(output dimmension 1). + It assumes that `self.model` is a classification model with a value head(output dimension 1). The first logits value from the model's output is used as the reward score. """ @@ -757,7 +757,7 @@ class QwenLongPlugin(DefaultRMPlugin): # ms_dataset: https://modelscope.cn/datasets/iic/DocQA-RL-1.6K def __init__(self, model, template, accuracy_orm=None): super().__init__(model, template) - # initilize PTEngine to infer + # initialize PTEngine to infer self.engine = PtEngine.from_model_template(self.model, self.template, max_batch_size=0) # 0: no limit self.request_config = RequestConfig(temperature=0) # customise your request config here self.system = textwrap.dedent(""" diff --git a/deepanalyze/ms-swift/examples/train/rft/rft.py b/deepanalyze/ms-swift/examples/train/rft/rft.py index 4c4525a4..3b0294ba 100644 --- a/deepanalyze/ms-swift/examples/train/rft/rft.py +++ b/deepanalyze/ms-swift/examples/train/rft/rft.py @@ -234,7 +234,7 @@ def main(): print(f"do sample cost: {(time.time()-ts) / 60:.1f} minutes.", flush=True) ts = time.time() # if want to train the original dataset with datasets, add the original dataset here - # if want to train the original model everytime, change to first_model + # if want to train the original model every time, change to first_model ckpt = do_train(model, model_type, datasets, i) print(f"do train cost: {(time.time() - ts) / 60:.1f} minutes.", flush=True) ts = time.time() diff --git a/deepanalyze/ms-swift/swift/llm/infer/rollout.py b/deepanalyze/ms-swift/swift/llm/infer/rollout.py index 91b812f8..baa4131c 100644 --- a/deepanalyze/ms-swift/swift/llm/infer/rollout.py +++ b/deepanalyze/ms-swift/swift/llm/infer/rollout.py @@ -383,7 +383,7 @@ async def infer( requests = RolloutInferRequest( messages=[{"role": "user", "content": ""}] ) - # different seed bewteen vLLM Engine + # different seed between vLLM Engine if request_config.seed: request_config.seed += i * len(requests) kwargs = { diff --git a/deepanalyze/ms-swift/swift/llm/train/rlhf.py b/deepanalyze/ms-swift/swift/llm/train/rlhf.py index fdf95d40..8be47ba4 100644 --- a/deepanalyze/ms-swift/swift/llm/train/rlhf.py +++ b/deepanalyze/ms-swift/swift/llm/train/rlhf.py @@ -18,7 +18,7 @@ class SwiftRLHF(SwiftSft): def _prepare_model_tokenizer(self): if self.args.sequence_parallel_size > 1: - # Duplicate calling is allowd to promise this function will + # Duplicate calling is allowed to promise this function will # be called before model initializing. from swift.trainers.sequence_parallel import sequence_parallel diff --git a/deepanalyze/ms-swift/swift/llm/train/tuner.py b/deepanalyze/ms-swift/swift/llm/train/tuner.py index 9c7ceabf..568d931b 100644 --- a/deepanalyze/ms-swift/swift/llm/train/tuner.py +++ b/deepanalyze/ms-swift/swift/llm/train/tuner.py @@ -353,7 +353,7 @@ def prepare_adapter( logger.info(f"reft config: {reft_config}") model = Swift.prepare_model(model, {"reft": reft_config}) elif args.train_type == "bone": - # Version loosing + # Version losing from peft import BoneConfig bone_config = BoneConfig( diff --git a/deepanalyze/ms-swift/swift/plugin/rm_plugin.py b/deepanalyze/ms-swift/swift/plugin/rm_plugin.py index 92783836..16545236 100644 --- a/deepanalyze/ms-swift/swift/plugin/rm_plugin.py +++ b/deepanalyze/ms-swift/swift/plugin/rm_plugin.py @@ -17,7 +17,7 @@ class DefaultRMPlugin: Default Reward Model Plugin This class implements the default processing logic for reward models. - It assumes that `self.model` is a classification model with a value head(output dimmension 1). + It assumes that `self.model` is a classification model with a value head(output dimension 1). The first logits value from the model's output is used as the reward score. """ @@ -54,7 +54,7 @@ def __init__(self, model, template): """ super().__init__(model, template) - # initilize PTEngine to infer + # initialize PTEngine to infer self.engine = PtEngine.from_model_template( self.model, self.template, max_batch_size=0 ) # 0: no limit diff --git a/example/analysis_on_student_loan/README.md b/example/analysis_on_student_loan/README.md index e5968864..ea64d6ee 100644 --- a/example/analysis_on_student_loan/README.md +++ b/example/analysis_on_student_loan/README.md @@ -1,4 +1,4 @@ -# Open-ended Data Reseach on Student Loan +# Open-ended Data Research on Student Loan ## Input ```python diff --git a/scripts/multi_coldstart.sh b/scripts/multi_coldstart.sh index 5fee320d..f5dba2af 100644 --- a/scripts/multi_coldstart.sh +++ b/scripts/multi_coldstart.sh @@ -13,18 +13,18 @@ swift sft \ --model "${MODEL_SINGLE_ABILITY_PATH}" \ --train_type "full" \ --dataset \ - "${DATA_DIR}/interation/data_pipeline_3601.json" \ - "${DATA_DIR}/interation/data_preparation_3311.json" \ - "${DATA_DIR}/interation/data_cleaning_1616.json" \ - "${DATA_DIR}/interation/data_analysis_3936.json" \ - "${DATA_DIR}/interation/data_insight_1062.json" \ - "${DATA_DIR}/interation/research_database_818.json" \ - "${DATA_DIR}/interation/research_xlsx_848.json" \ - "${DATA_DIR}/interation/research_other_3505.json" \ - "${DATA_DIR}/interation/research_data_preparation_488.json" \ - "${DATA_DIR}/interation/research_data_analysis_1339.json" \ - "${DATA_DIR}/interation/research_data_insight_1351.json" \ - "${DATA_DIR}/interation/research_report_generation_4327.json" \ + "${DATA_DIR}/iteration/data_pipeline_3601.json" \ + "${DATA_DIR}/iteration/data_preparation_3311.json" \ + "${DATA_DIR}/iteration/data_cleaning_1616.json" \ + "${DATA_DIR}/iteration/data_analysis_3936.json" \ + "${DATA_DIR}/iteration/data_insight_1062.json" \ + "${DATA_DIR}/iteration/research_database_818.json" \ + "${DATA_DIR}/iteration/research_xlsx_848.json" \ + "${DATA_DIR}/iteration/research_other_3505.json" \ + "${DATA_DIR}/iteration/research_data_preparation_488.json" \ + "${DATA_DIR}/iteration/research_data_analysis_1339.json" \ + "${DATA_DIR}/iteration/research_data_insight_1351.json" \ + "${DATA_DIR}/iteration/research_report_generation_4327.json" \ --torch_dtype "bfloat16" \ --num_train_epochs 3 \ --per_device_train_batch_size 1 \ diff --git a/scripts/multi_rl.sh b/scripts/multi_rl.sh index 603643d8..5a8c5547 100644 --- a/scripts/multi_rl.sh +++ b/scripts/multi_rl.sh @@ -13,7 +13,7 @@ python -m examples.deepanalyze.main_deepanalyze \ data.train_data="[ \"${DATA_DIR}/RL/qa.parquet\", \"${DATA_DIR}/RL/datatask.parquet\", - \"${DATA_DIR}/RL/reseach.parquet\" + \"${DATA_DIR}/RL/research.parquet\" ]" \ trainer.policy.model.path="${MODEL_COLDSTART_PATH}" \ trainer.placement.colocate_all=true \